From 20e9fc55feb58dd1f766a494c530684011291ff3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 24 May 2020 22:19:22 -0700 Subject: [PATCH 001/770] [MCDwarf] Delete unneeded DW_AT_prototyped for DW_TAG_label --- llvm/lib/MC/MCDwarf.cpp | 4 ---- llvm/test/MC/MachO/gen-dwarf.s | 4 ---- 2 files changed, 8 deletions(-) diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 0b7fc45540189..71b8f0e28e1cd 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -846,7 +846,6 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4); EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4); EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); - EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag); EmitAbbrev(MCOS, 0, 0); // DW_TAG_unspecified_parameters DIE abbrev (3). @@ -1087,9 +1086,6 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCSymbolRefExpr::VK_None, context); MCOS->emitValue(AT_low_pc, AddrSize); - // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype. - MCOS->emitInt8(0); - // The DW_TAG_unspecified_parameters DIE abbrev (3). MCOS->emitULEB128IntValue(3); diff --git a/llvm/test/MC/MachO/gen-dwarf.s b/llvm/test/MC/MachO/gen-dwarf.s index 6d39d278e8184..58f8a7ccf8994 100644 --- a/llvm/test/MC/MachO/gen-dwarf.s +++ b/llvm/test/MC/MachO/gen-dwarf.s @@ -30,7 +30,6 @@ _x: .long 1 // CHECK: DW_AT_decl_file DW_FORM_data4 // CHECK: DW_AT_decl_line DW_FORM_data4 // CHECK: DW_AT_low_pc DW_FORM_addr -// CHECK: DW_AT_prototyped DW_FORM_flag // CHECK: [3] DW_TAG_unspecified_parameters DW_CHILDREN_no @@ -53,7 +52,6 @@ _x: .long 1 // CHECK: DW_AT_decl_file ([[FILE:".*gen-dwarf.s"]]) // CHECK: DW_AT_decl_line (5) // CHECK: DW_AT_low_pc (0x0000000000000000) -// CHECK: DW_AT_prototyped (0x00) // CHECK: DW_TAG_unspecified_parameters @@ -64,7 +62,6 @@ _x: .long 1 // CHECK: DW_AT_decl_file ([[FILE]]) // CHECK: DW_AT_decl_line (9) // CHECK: DW_AT_low_pc (0x0000000000000007) -// CHECK: DW_AT_prototyped (0x00) // CHECK: DW_TAG_unspecified_parameters @@ -75,7 +72,6 @@ _x: .long 1 // CHECK: DW_AT_decl_file ([[FILE]]) // CHECK: DW_AT_decl_line (10) // CHECK: DW_AT_low_pc (0x0000000000000007) -// CHECK: DW_AT_prototyped (0x00) // CHECK: DW_TAG_unspecified_parameters From 1b79509f97b6c9595027b53d3d67f174d0ae1c78 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 24 May 2020 22:30:59 -0700 Subject: [PATCH 002/770] [MCDwarf] Delete unneeded DW_AT_unspecified_parameters --- llvm/lib/MC/MCDwarf.cpp | 14 +------------- llvm/test/MC/ARM/dwarf-asm-multiple-sections.s | 4 ++-- llvm/test/MC/MachO/gen-dwarf.s | 16 +--------------- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 71b8f0e28e1cd..d75b55c6f8d26 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -841,19 +841,13 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { // DW_TAG_label DIE abbrev (2). MCOS->emitULEB128IntValue(2); MCOS->emitULEB128IntValue(dwarf::DW_TAG_label); - MCOS->emitInt8(dwarf::DW_CHILDREN_yes); + MCOS->emitInt8(dwarf::DW_CHILDREN_no); EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4); EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4); EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); EmitAbbrev(MCOS, 0, 0); - // DW_TAG_unspecified_parameters DIE abbrev (3). - MCOS->emitULEB128IntValue(3); - MCOS->emitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters); - MCOS->emitInt8(dwarf::DW_CHILDREN_no); - EmitAbbrev(MCOS, 0, 0); - // Terminate the abbreviations for this compilation unit. MCOS->emitInt8(0); } @@ -1085,12 +1079,6 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const MCExpr *AT_low_pc = MCSymbolRefExpr::create(Entry.getLabel(), MCSymbolRefExpr::VK_None, context); MCOS->emitValue(AT_low_pc, AddrSize); - - // The DW_TAG_unspecified_parameters DIE abbrev (3). - MCOS->emitULEB128IntValue(3); - - // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's. - MCOS->emitInt8(0); } // Add the NULL DIE terminating the Compile Unit DIE's. diff --git a/llvm/test/MC/ARM/dwarf-asm-multiple-sections.s b/llvm/test/MC/ARM/dwarf-asm-multiple-sections.s index ffcdfda397524..2f32681b36271 100644 --- a/llvm/test/MC/ARM/dwarf-asm-multiple-sections.s +++ b/llvm/test/MC/ARM/dwarf-asm-multiple-sections.s @@ -41,10 +41,10 @@ b: // DWARF4: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000 // DWARF5: DW_AT_ranges [DW_FORM_sec_offset] (0x0000000c -// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] * +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] // DWARF-NEXT: DW_AT_name [DW_FORM_string] ("a") -// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] * +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] // DWARF-NEXT: DW_AT_name [DW_FORM_string] ("b") diff --git a/llvm/test/MC/MachO/gen-dwarf.s b/llvm/test/MC/MachO/gen-dwarf.s index 58f8a7ccf8994..5bf6cac3428e8 100644 --- a/llvm/test/MC/MachO/gen-dwarf.s +++ b/llvm/test/MC/MachO/gen-dwarf.s @@ -25,14 +25,12 @@ _x: .long 1 // CHECK: DW_AT_producer DW_FORM_string // CHECK: DW_AT_language DW_FORM_data2 -// CHECK: [2] DW_TAG_label DW_CHILDREN_yes +// CHECK: [2] DW_TAG_label DW_CHILDREN_no // CHECK: DW_AT_name DW_FORM_string // CHECK: DW_AT_decl_file DW_FORM_data4 // CHECK: DW_AT_decl_line DW_FORM_data4 // CHECK: DW_AT_low_pc DW_FORM_addr -// CHECK: [3] DW_TAG_unspecified_parameters DW_CHILDREN_no - // CHECK: .debug_info contents: @@ -53,30 +51,18 @@ _x: .long 1 // CHECK: DW_AT_decl_line (5) // CHECK: DW_AT_low_pc (0x0000000000000000) -// CHECK: DW_TAG_unspecified_parameters - -// CHECK: NULL - // CHECK: DW_TAG_label // CHECK: DW_AT_name ("foo") // CHECK: DW_AT_decl_file ([[FILE]]) // CHECK: DW_AT_decl_line (9) // CHECK: DW_AT_low_pc (0x0000000000000007) -// CHECK: DW_TAG_unspecified_parameters - -// CHECK: NULL - // CHECK: DW_TAG_label // CHECK: DW_AT_name ("baz") // CHECK: DW_AT_decl_file ([[FILE]]) // CHECK: DW_AT_decl_line (10) // CHECK: DW_AT_low_pc (0x0000000000000007) -// CHECK: DW_TAG_unspecified_parameters - -// CHECK: NULL - // CHECK: NULL // CHECK: .debug_aranges contents: From 760f45eacadbabf9634fb81d7ccaa16c269cf19e Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Mon, 25 May 2020 10:28:17 +0300 Subject: [PATCH 003/770] [CMake] Properly handle the LTO cache arguments for MinGW We want to make sure that LINKER_IS_LLD_LINK is properly set - in this case it shouldn't be set when building for MinGW. Then we want to make the test for it correct and finally include the option to build with thinlto cache since the MinGW driver now supports that. Differential Revision: https://reviews.llvm.org/D80493 --- llvm/cmake/modules/HandleLLVMOptions.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index b50100f4d63ad..d5c924ca1a2c8 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -13,7 +13,7 @@ include(CheckCXXCompilerFlag) include(CheckSymbolExists) include(CMakeDependentOption) -if(CMAKE_LINKER MATCHES "lld-link" OR (WIN32 AND LLVM_USE_LINKER STREQUAL "lld") OR LLVM_ENABLE_LLD) +if(CMAKE_LINKER MATCHES "lld-link" OR (MSVC AND (LLVM_USE_LINKER STREQUAL "lld" OR LLVM_ENABLE_LLD))) set(LINKER_IS_LLD_LINK TRUE) else() set(LINKER_IS_LLD_LINK FALSE) @@ -941,7 +941,7 @@ if (LLVM_BUILD_INSTRUMENTED AND LLVM_BUILD_INSTRUMENTED_COVERAGE) message(FATAL_ERROR "LLVM_BUILD_INSTRUMENTED and LLVM_BUILD_INSTRUMENTED_COVERAGE cannot both be specified") endif() -if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK) +if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK AND NOT MINGW) message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)") endif() if(uppercase_LLVM_ENABLE_LTO STREQUAL "THIN") @@ -956,7 +956,7 @@ if(uppercase_LLVM_ENABLE_LTO STREQUAL "THIN") if(APPLE) append("-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) - elseif(UNIX AND LLVM_USE_LINKER STREQUAL "lld") + elseif((UNIX OR MINGW) AND LLVM_USE_LINKER STREQUAL "lld") append("-Wl,--thinlto-cache-dir=${PROJECT_BINARY_DIR}/lto.cache" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) elseif(LLVM_USE_LINKER STREQUAL "gold") From 5b7ff6f07ffbcbcfad24f39faad5858cc379fad0 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Mon, 25 May 2020 09:48:51 +0200 Subject: [PATCH 004/770] [VE][NFC] Correct sjlj_expection test Summary: '|&' works with bash only, so it should not be used in regression tests. Differential Revision: https://reviews.llvm.org/D80501 --- llvm/test/CodeGen/VE/sjlj_except.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/VE/sjlj_except.ll b/llvm/test/CodeGen/VE/sjlj_except.ll index 582ee6eb1e6a8..4d2558571bf4f 100644 --- a/llvm/test/CodeGen/VE/sjlj_except.ll +++ b/llvm/test/CodeGen/VE/sjlj_except.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=x86_64-unknown-unknown --exception-model=sjlj --print-after=sjljehprepare < %s |& FileCheck --check-prefix=CHECK-X86 %s -; RUN: (llc -mtriple=ve-unknown-unknown --exception-model=sjlj --print-after=sjljehprepare < %s || true) |& FileCheck --check-prefix=CHECK-VE %s +; RUN: llc -mtriple=x86_64-unknown-unknown --exception-model=sjlj --print-after=sjljehprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s +; RUN: (llc -mtriple=ve-unknown-unknown --exception-model=sjlj --print-after=sjljehprepare < %s || true) 2>&1 | FileCheck --check-prefix=CHECK-VE %s @SomeGlobal = external dso_local global i8 From b752a2743ab0d24d8da5d97c07fbdb996df78b1f Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 25 May 2020 10:19:34 +0200 Subject: [PATCH 005/770] [clangd] Log use of heuristic go-to-def. NFC Generally: - found results using this method -> log - no results using this method -> vlog - method wasn't applied because ineligible -> no log --- clang-tools-extra/clangd/XRefs.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 1d82763b6a3cf..1fc0e0348d093 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -438,8 +438,11 @@ locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST, ScoredResults.push_back({Score, std::move(Located)}); }); - if (TooMany) + if (TooMany) { + vlog("Heuristic index lookup for {0} returned too many candidates, ignored", + Word.Text); return {}; + } llvm::sort(ScoredResults, [](const ScoredLocatedSymbol &A, const ScoredLocatedSymbol &B) { @@ -448,6 +451,10 @@ locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST, std::vector Results; for (auto &Res : std::move(ScoredResults)) Results.push_back(std::move(Res.second)); + if (Results.empty()) + vlog("No heuristic index definition for {0}", Word.Text); + else + log("Found definition heuristically in index for {0}", Word.Text); return Results; } @@ -570,13 +577,22 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, // Is the same word nearby a real identifier that might refer to something? if (const syntax::Token *NearbyIdent = findNearbyIdentifier(*Word, AST.getTokens())) { - if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath)) + if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath)) { + log("Found macro definition heuristically using nearby identifier {0}", + Word->Text); return {*std::move(Macro)}; + } ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST, *MainFilePath, Index, /*NodeKind=*/nullptr); - if (!ASTResults.empty()) + if (!ASTResults.empty()) { + log("Found definition heuristically using nearby identifier {0}", + NearbyIdent->text(SM)); return ASTResults; + } else { + vlog("No definition found using nearby identifier {0} at {1}", + Word->Text, Word->Location.printToString(SM)); + } } // No nearby word, or it didn't refer to anything either. Try the index. auto TextualResults = From 3895148d7cd8ff76220f8f8209ec06369a8e816f Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Mon, 25 May 2020 10:19:35 +0200 Subject: [PATCH 006/770] [OpenMP] Fix a race in task queue reallocation __kmp_realloc_task_deque implicitly assumes, that the task queue is full (ntasks == size), therefore tail = size in line 319. An assertion is added to document this assumption. The first check for a full queue is before the locking and might not hold when the lock is taken. So, we need to check again for this condition when we have the lock. Reviewed By: AndreyChurbanov Differential Revision: https://reviews.llvm.org/D80480 --- openmp/runtime/src/kmp_tasking.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index c928517410608..6e584731a85fe 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -298,6 +298,7 @@ static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, static void __kmp_realloc_task_deque(kmp_info_t *thread, kmp_thread_data_t *thread_data) { kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); + KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size); kmp_int32 new_size = 2 * size; KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " @@ -381,8 +382,11 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { } else { __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); locked = 1; - // expand deque to push the task which is not allowed to execute - __kmp_realloc_task_deque(thread, thread_data); + if (TCR_4(thread_data->td.td_deque_ntasks) >= + TASK_DEQUE_SIZE(thread_data->td)) { + // expand deque to push the task which is not allowed to execute + __kmp_realloc_task_deque(thread, thread_data); + } } } // Lock the deque for the task push operation @@ -3659,7 +3663,11 @@ static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task, return result; __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); - __kmp_realloc_task_deque(thread, thread_data); + if (TCR_4(thread_data->td.td_deque_ntasks) >= + TASK_DEQUE_SIZE(thread_data->td)) { + // expand deque to push the task which is not allowed to execute + __kmp_realloc_task_deque(thread, thread_data); + } } else { From 840450549c9199150cbdee29acef756c19660ca1 Mon Sep 17 00:00:00 2001 From: Ayal Zaks Date: Sun, 24 May 2020 16:22:39 +0300 Subject: [PATCH 007/770] [LV] Clamp MaxVF to power of 2. If a loop has a constant trip count known to be a multiple of MaxVF (times user UF), LV infers that no tail will be generated for any chosen VF. This relies on the chosen VF's being powers of 2 bound by MaxVF, and assumes MaxVF is a power of 2. Make sure the latter holds, in particular when MaxVF is set by a memory dependence distance which may not be a power of 2. Differential Revision: https://reviews.llvm.org/D80491 --- .../Transforms/Vectorize/LoopVectorize.cpp | 4 + .../LoopVectorize/memdep-fold-tail.ll | 108 ++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6c2a3e42de48c..df1529a2f7b9e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5003,6 +5003,7 @@ Optional LoopVectorizationCostModel::computeMaxVF(unsigned UserVF, } unsigned MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC); + assert((UserVF || isPowerOf2_32(MaxVF)) && "MaxVF must be a power of 2"); unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF; if (TC > 0 && TC % MaxVFtimesIC == 0) { // Accept MaxVF if we do not have a tail. @@ -5051,6 +5052,9 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth); + // Ensure MaxVF is a power of 2; the dependence distance bound may not be. + WidestRegister = PowerOf2Floor(WidestRegister); + unsigned MaxVectorSize = WidestRegister / WidestType; LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll new file mode 100644 index 0000000000000..4fe0d12253506 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -vectorize-num-stores-pred=2 -prefer-predicate-over-epilog -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Vectorization with dependence checks. + +; Check that a non-power-of-2 MaxVF, calculated based on maximum safe distance, +; does not lead fold-tail to think that no tail will be generated for any chosen +; (power of 2) VF. +; Dependence distance here is 3 iterations. +; Tiny trip count of 15 divides 3, but any (even) VF will have a tail. + +;unsigned char a [15+3]; +;void maxvf3(){ +; for (int j = 0; j < 15; ++j) { +; a[j] = 69; +; a[j+3] = 7; +; } +;} + +@a = common local_unnamed_addr global [18 x i8] zeroinitializer, align 16 + +define void @maxvf3() { +; CHECK-LABEL: @maxvf3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP2]] +; CHECK-NEXT: store i8 69, i8* [[TMP3]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP5]] +; CHECK-NEXT: store i8 69, i8* [[TMP6]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> , [[VEC_IND]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP9]] +; CHECK-NEXT: store i8 7, i8* [[TMP10]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP12]] +; CHECK-NEXT: store i8 7, i8* [[TMP13]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[AJ:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[J]] +; CHECK-NEXT: store i8 69, i8* [[AJ]], align 8 +; CHECK-NEXT: [[JP3:%.*]] = add nuw nsw i32 3, [[J]] +; CHECK-NEXT: [[AJP3:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[JP3]] +; CHECK-NEXT: store i8 7, i8* [[AJP3]], align 8 +; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ] + %aj = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 %j + store i8 69, i8* %aj, align 8 + %jp3 = add nuw nsw i32 3, %j + %ajp3 = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 %jp3 + store i8 7, i8* %ajp3, align 8 + %j.next = add nuw nsw i32 %j, 1 + %exitcond = icmp eq i32 %j.next, 15 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} From 447ea9b4f5f562c8fab7d11ecbb10ecd33155d5b Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Wed, 20 May 2020 07:22:01 +0100 Subject: [PATCH 008/770] [AST] default implementation is possible for non-member functions in C++20. Summary: Make RAV not visit the default function decl by default. Also update some stale comments on FunctionDecl::isDefault. Fixes https://github.com/clangd/clangd/issues/383 Reviewers: sammccall, rsmith Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80288 --- clang/include/clang/AST/Decl.h | 10 ++++------ clang/include/clang/AST/RecursiveASTVisitor.h | 10 +++++----- .../RecursiveASTVisitorTests/CXXMethodDecl.cpp | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index d7136a4cd420b..2e1630827cce3 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -2125,19 +2125,17 @@ class FunctionDecl : public DeclaratorDecl, bool isTrivialForCall() const { return FunctionDeclBits.IsTrivialForCall; } void setTrivialForCall(bool IT) { FunctionDeclBits.IsTrivialForCall = IT; } - /// Whether this function is defaulted per C++0x. Only valid for - /// special member functions. + /// Whether this function is defaulted. Valid for e.g. + /// special member functions, defaulted comparisions (not methods!). bool isDefaulted() const { return FunctionDeclBits.IsDefaulted; } void setDefaulted(bool D = true) { FunctionDeclBits.IsDefaulted = D; } - /// Whether this function is explicitly defaulted per C++0x. Only valid - /// for special member functions. + /// Whether this function is explicitly defaulted. bool isExplicitlyDefaulted() const { return FunctionDeclBits.IsExplicitlyDefaulted; } - /// State that this function is explicitly defaulted per C++0x. Only valid - /// for special member functions. + /// State that this function is explicitly defaulted. void setExplicitlyDefaulted(bool ED = true) { FunctionDeclBits.IsExplicitlyDefaulted = ED; } diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index a264d1cf24b23..b30d456bd24a8 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2103,11 +2103,11 @@ bool RecursiveASTVisitor::TraverseFunctionHelper(FunctionDecl *D) { } } - bool VisitBody = D->isThisDeclarationADefinition(); - // If a method is set to default outside the class definition the compiler - // generates the method body and adds it to the AST. - if (const auto *MD = dyn_cast(D)) - VisitBody &= !MD->isDefaulted() || getDerived().shouldVisitImplicitCode(); + bool VisitBody = + D->isThisDeclarationADefinition() && + // Don't visit the function body if the function definition is generated + // by clang. + (!D->isDefaulted() || getDerived().shouldVisitImplicitCode()); if (VisitBody) { TRY_TO(TraverseStmt(D->getBody())); // Function body. diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/CXXMethodDecl.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/CXXMethodDecl.cpp index 6441ea99dd2cc..90fa84bd44812 100644 --- a/clang/unittests/Tooling/RecursiveASTVisitorTests/CXXMethodDecl.cpp +++ b/clang/unittests/Tooling/RecursiveASTVisitorTests/CXXMethodDecl.cpp @@ -55,4 +55,22 @@ TEST(RecursiveASTVisitor, CXXMethodDeclNoDefaultBodyVisited) { EXPECT_TRUE(Visitor.runOver(Code, CXXMethodDeclVisitor::Lang_CXX11)); } } + +TEST(RecursiveASTVisitor, FunctionDeclNoDefaultBodyVisited) { + for (bool VisitImplCode : {false, true}) { + CXXMethodDeclVisitor Visitor(VisitImplCode); + if (VisitImplCode) + Visitor.ExpectMatch("declref", 4, 58, /*Times=*/2); + else + Visitor.DisallowMatch("declref", 4, 58); + llvm::StringRef Code = R"cpp( + struct s { + int x; + friend auto operator==(s a, s b) -> bool = default; + }; + bool k = s() == s(); // make sure clang generates the "==" definition. + )cpp"; + EXPECT_TRUE(Visitor.runOver(Code, CXXMethodDeclVisitor::Lang_CXX2a)); + } +} } // end anonymous namespace From 72c5ea1d73bb89af6f82c14ddb0b7f4c2510bab7 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 5 May 2020 17:11:30 +0200 Subject: [PATCH 009/770] [clangd] Enable cross-file-rename by default. Summary: The cross-file rename feature is stable enough to enable it (has been rolled out internally for a few weeks). Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80507 --- clang-tools-extra/clangd/tool/ClangdMain.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 031f57f954cb2..cab6c97cf121e 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -274,11 +274,8 @@ list TweakList{ opt CrossFileRename{ "cross-file-rename", cat(Features), - desc("Enable cross-file rename feature. Note that this feature is " - "experimental and may lead to broken code or incomplete rename " - "results"), - init(false), - Hidden, + desc("Enable cross-file rename feature."), + init(true), }; opt RecoveryAST{ From 83bd2c4a06803fa9af7f92a474b1d37cb70397cc Mon Sep 17 00:00:00 2001 From: Jaroslav Sevcik Date: Mon, 25 May 2020 11:17:48 +0200 Subject: [PATCH 010/770] Prevent GetNumChildren from transitively walking pointer chains Summary: This is an attempt to fix https://bugs.llvm.org/show_bug.cgi?id=45988, where SBValue::GetNumChildren returns 2, but SBValue::GetChildAtIndex(1) returns an invalid value sentinel. The root cause of this seems to be that GetNumChildren can return the number of children of a wrong value. In particular, for pointers GetNumChildren just recursively calls itself on the pointee type, so it effectively walks chains of pointers. This is different from the logic of GetChildAtIndex, which only recurses if pointee.IsAggregateType() returns true (IsAggregateType is false for pointers and references), so it never follows chain of pointers. This patch aims to make GetNumChildren (more) consistent with GetChildAtIndex by only recursively calling GetNumChildren for aggregate types. Ideally, GetNumChildren and GetChildAtIndex would share the code that decides which pointers/references are followed, but that is a bit more invasive change. Reviewers: teemperor, jingham, clayborg Reviewed By: teemperor, clayborg Subscribers: clayborg, labath, shafik, lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D80254 --- .../TypeSystem/Clang/TypeSystemClang.cpp | 34 +++++++------------ .../pointer_num_children/Makefile | 3 ++ .../TestPointerNumChildren.py | 28 +++++++++++++++ .../pointer_num_children/main.cpp | 16 +++++++++ 4 files changed, 60 insertions(+), 21 deletions(-) create mode 100644 lldb/test/API/functionalities/pointer_num_children/Makefile create mode 100644 lldb/test/API/functionalities/pointer_num_children/TestPointerNumChildren.py create mode 100644 lldb/test/API/functionalities/pointer_num_children/main.cpp diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 6e8946e23104f..c687251ed5dcb 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -5172,12 +5172,15 @@ uint32_t TypeSystemClang::GetNumChildren(lldb::opaque_compiler_type_t type, } break; + case clang::Type::LValueReference: + case clang::Type::RValueReference: case clang::Type::ObjCObjectPointer: { - const clang::ObjCObjectPointerType *pointer_type = - llvm::cast(qual_type.getTypePtr()); - clang::QualType pointee_type = pointer_type->getPointeeType(); - uint32_t num_pointee_children = - GetType(pointee_type).GetNumChildren(omit_empty_base_classes, exe_ctx); + CompilerType pointee_clang_type(GetPointeeType(type)); + + uint32_t num_pointee_children = 0; + if (pointee_clang_type.IsAggregateType()) + num_pointee_children = + pointee_clang_type.GetNumChildren(omit_empty_base_classes, exe_ctx); // If this type points to a simple type, then it has 1 child if (num_pointee_children == 0) num_children = 1; @@ -5209,8 +5212,11 @@ uint32_t TypeSystemClang::GetNumChildren(lldb::opaque_compiler_type_t type, const clang::PointerType *pointer_type = llvm::cast(qual_type.getTypePtr()); clang::QualType pointee_type(pointer_type->getPointeeType()); - uint32_t num_pointee_children = - GetType(pointee_type).GetNumChildren(omit_empty_base_classes, exe_ctx); + CompilerType pointee_clang_type(GetType(pointee_type)); + uint32_t num_pointee_children = 0; + if (pointee_clang_type.IsAggregateType()) + num_pointee_children = + pointee_clang_type.GetNumChildren(omit_empty_base_classes, exe_ctx); if (num_pointee_children == 0) { // We have a pointer to a pointee type that claims it has no children. We // will want to look at @@ -5219,20 +5225,6 @@ uint32_t TypeSystemClang::GetNumChildren(lldb::opaque_compiler_type_t type, num_children = num_pointee_children; } break; - case clang::Type::LValueReference: - case clang::Type::RValueReference: { - const clang::ReferenceType *reference_type = - llvm::cast(qual_type.getTypePtr()); - clang::QualType pointee_type = reference_type->getPointeeType(); - uint32_t num_pointee_children = - GetType(pointee_type).GetNumChildren(omit_empty_base_classes, exe_ctx); - // If this type points to a simple type, then it has 1 child - if (num_pointee_children == 0) - num_children = 1; - else - num_children = num_pointee_children; - } break; - default: break; } diff --git a/lldb/test/API/functionalities/pointer_num_children/Makefile b/lldb/test/API/functionalities/pointer_num_children/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/functionalities/pointer_num_children/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/functionalities/pointer_num_children/TestPointerNumChildren.py b/lldb/test/API/functionalities/pointer_num_children/TestPointerNumChildren.py new file mode 100644 index 0000000000000..aaeaef75810cb --- /dev/null +++ b/lldb/test/API/functionalities/pointer_num_children/TestPointerNumChildren.py @@ -0,0 +1,28 @@ +""" +Test children counts of pointer values. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestPointerNumChilden(TestBase): + mydir = TestBase.compute_mydir(__file__) + + def test_pointer_num_children(self): + self.build() + lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.cpp")) + + result = self.frame().FindVariable("Ref") + self.assertEqual(1, result.GetNumChildren()) + self.assertEqual(2, result.GetChildAtIndex(0).GetNumChildren()) + self.assertEqual("42", result.GetChildAtIndex(0).GetChildAtIndex(0).GetValue()) + self.assertEqual("56", result.GetChildAtIndex(0).GetChildAtIndex(1).GetValue()) + + result = self.frame().FindVariable("Ptr") + self.assertEqual(1, result.GetNumChildren()) + self.assertEqual(2, result.GetChildAtIndex(0).GetNumChildren()) + self.assertEqual("42", result.GetChildAtIndex(0).GetChildAtIndex(0).GetValue()) + self.assertEqual("56", result.GetChildAtIndex(0).GetChildAtIndex(1).GetValue()) diff --git a/lldb/test/API/functionalities/pointer_num_children/main.cpp b/lldb/test/API/functionalities/pointer_num_children/main.cpp new file mode 100644 index 0000000000000..a17182092a676 --- /dev/null +++ b/lldb/test/API/functionalities/pointer_num_children/main.cpp @@ -0,0 +1,16 @@ +struct Inner { + int a; + int b; +}; + +struct Outer { + Inner *inner; +}; + +int main() { + Inner inner{42, 56}; + Outer outer{&inner}; + Inner **Ptr = &(outer.inner); + Inner *&Ref = outer.inner; + return 0; // break here +} From fe22e5689e94370b8eadef4b7267201cc9fcb2e3 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 25 May 2020 12:14:21 +0200 Subject: [PATCH 011/770] [lldb][NFC] Pass DeclarationName to NameSearchContext by value DeclarationName is usually passed around by value as it's just a pointer. --- .../source/Plugins/ExpressionParser/Clang/NameSearchContext.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.h b/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.h index 52d2a19a404b3..dc8621dd6aba5 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.h @@ -32,7 +32,7 @@ struct NameSearchContext { /// modules. ClangASTImporter::NamespaceMapSP m_namespace_map; /// The name being looked for. - const clang::DeclarationName &m_decl_name; + const clang::DeclarationName m_decl_name; /// The DeclContext to put declarations into. const clang::DeclContext *m_decl_context; /// All the types of functions that have been reported, so we don't @@ -63,7 +63,7 @@ struct NameSearchContext { /// The DeclContext to register Decls in. NameSearchContext(TypeSystemClang &clang_ts, llvm::SmallVectorImpl &decls, - clang::DeclarationName &name, const clang::DeclContext *dc) + clang::DeclarationName name, const clang::DeclContext *dc) : m_clang_ts(clang_ts), m_decls(decls), m_namespace_map(std::make_shared()), m_decl_name(name), m_decl_context(dc) { From b087b91c917087bc53d47282a16ee4af78bfe286 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 25 May 2020 14:19:22 +0300 Subject: [PATCH 012/770] [AMDGPU][CODEGEN] Added 'A' constraint for inline assembler Summary: 'A' constraint requires an immediate int or fp constant that can be inlined in an instruction encoding. Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D78494 --- llvm/docs/LangRef.rst | 2 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 13 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 58 ++++ llvm/lib/Target/AMDGPU/SIISelLowering.h | 7 + .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 10 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 + .../test/CodeGen/AMDGPU/inline-constraints.ll | 277 +++++++++++++++++- 7 files changed, 363 insertions(+), 7 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a1f3297d6454f..bf0627e441960 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4128,7 +4128,7 @@ AMDGPU: - ``[0-9]v``: The 32-bit VGPR register, number 0-9. - ``[0-9]s``: The 32-bit SGPR register, number 0-9. - ``[0-9]a``: The 32-bit AGPR register, number 0-9. - +- ``A``: An integer or a floating-point inline constant. All ARM modes: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 36cc0ea20052e..81676d63643df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1339,7 +1339,18 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, AMDGPUInstPrinter::printRegOperand(MO.getReg(), O, *MF->getSubtarget().getRegisterInfo()); return false; + } else if (MO.isImm()) { + int64_t Val = MO.getImm(); + if (AMDGPU::isInlinableIntLiteral(Val)) { + O << Val; + } else if (isUInt<16>(Val)) { + O << format("0x%" PRIx64, static_cast(Val)); + } else if (isUInt<32>(Val)) { + O << format("0x%" PRIx64, static_cast(Val)); + } else { + O << format("0x%" PRIx64, static_cast(Val)); + } + return false; } - return true; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3ddf4ae70397d..2c147fa8947c1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10886,11 +10886,69 @@ SITargetLowering::getConstraintType(StringRef Constraint) const { case 'v': case 'a': return C_RegisterClass; + case 'A': + return C_Other; } } return TargetLowering::getConstraintType(Constraint); } +void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const { + if (Constraint.length() == 1 && Constraint[0] == 'A') { + LowerAsmOperandForConstraintA(Op, Ops, DAG); + } else { + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); + } +} + +void SITargetLowering::LowerAsmOperandForConstraintA(SDValue Op, + std::vector &Ops, + SelectionDAG &DAG) const { + unsigned Size = Op.getScalarValueSizeInBits(); + if (Size > 64) + return; + + uint64_t Val; + bool IsConst = false; + if (ConstantSDNode *C = dyn_cast(Op)) { + Val = C->getSExtValue(); + IsConst = true; + } else if (ConstantFPSDNode *C = dyn_cast(Op)) { + Val = C->getValueAPF().bitcastToAPInt().getSExtValue(); + IsConst = true; + } else if (BuildVectorSDNode *V = dyn_cast(Op)) { + if (Size != 16 || Op.getNumOperands() != 2) + return; + if (Op.getOperand(0).isUndef() || Op.getOperand(1).isUndef()) + return; + if (ConstantSDNode *C = V->getConstantSplatNode()) { + Val = C->getSExtValue(); + IsConst = true; + } else if (ConstantFPSDNode *C = V->getConstantFPSplatNode()) { + Val = C->getValueAPF().bitcastToAPInt().getSExtValue(); + IsConst = true; + } + } + + if (IsConst) { + bool HasInv2Pi = Subtarget->hasInv2PiInlineImm(); + if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) || + (Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) || + (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) { + // Clear unused bits of fp constants + if (!AMDGPU::isInlinableIntLiteral(Val)) { + unsigned UnusedBits = 64 - Size; + Val = (Val << UnusedBits) >> UnusedBits; + } + auto Res = DAG.getTargetConstant(Val, SDLoc(Op), MVT::i64); + Ops.push_back(Res); + } + } +} + // Figure out which registers should be reserved for stack access. Only after // the function is legalized do we know all of the non-spill stack objects or if // calls are present. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 226003423889c..7ef11eba4f9ce 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -383,6 +383,13 @@ class SITargetLowering final : public AMDGPUTargetLowering { getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; ConstraintType getConstraintType(StringRef Constraint) const override; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + void LowerAsmOperandForConstraintA(SDValue Op, + std::vector &Ops, + SelectionDAG &DAG) const; SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 409bef0065e29..cba9857e4d158 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1169,8 +1169,12 @@ unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, return getRegBitWidth(MRI->getRegClass(RCID)) / 8; } +bool isInlinableIntLiteral(int64_t Literal) { + return Literal >= -16 && Literal <= 64; +} + bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; uint64_t Val = static_cast(Literal); @@ -1187,7 +1191,7 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { } bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; // The actual type of the operand does not seem to matter as long @@ -1216,7 +1220,7 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { if (!HasInv2Pi) return false; - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; uint16_t Val = static_cast(Literal); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index ef7b224138841..224f797b3ef84 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -776,6 +776,9 @@ struct SIModeRegisterDefaults { } }; +LLVM_READNONE +bool isInlinableIntLiteral(int64_t Literal); + } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll index 6f1d35519f2fa..63585ebc9553f 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll @@ -1,5 +1,8 @@ -; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: not llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: not llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI %s + +; RUN: not llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs 2>&1 | FileCheck --check-prefix=NOGCN --check-prefix=NOSI %s +; RUN: not llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs 2>&1 | FileCheck --check-prefix=NOGCN %s ; GCN-LABEL: {{^}}inline_reg_constraints: ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] @@ -74,3 +77,273 @@ define amdgpu_kernel void @inline_sreg_constraint_imm_f64() { tail call void asm sideeffect "; use $0", "s"(double 1.0) ret void } + +;============================================================================== +; 'A' constraint, 16-bit operand +;============================================================================== + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H0: +; VI: v_mov_b32 {{v[0-9]+}}, 64 +define i32 @inline_A_constraint_H0() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 64) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H1: +; VI: v_mov_b32 {{v[0-9]+}}, -16 +define i32 @inline_A_constraint_H1() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 -16) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H2: +; VI: v_mov_b32 {{v[0-9]+}}, 0x3c00 +define i32 @inline_A_constraint_H2() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 1.0 to i16)) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H3: +; VI: v_mov_b32 {{v[0-9]+}}, 0xbc00 +define i32 @inline_A_constraint_H3() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half -1.0 to i16)) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H4: +; VI: v_mov_b32 {{v[0-9]+}}, 0x3118 +define i32 @inline_A_constraint_H4() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(half 0xH3118) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H5: +; VI: v_mov_b32 {{v[0-9]+}}, 0x3118 +define i32 @inline_A_constraint_H5() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3118 to i16)) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_H6: +; VI: v_mov_b32 {{v[0-9]+}}, 0xb800 +define i32 @inline_A_constraint_H6() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(half -0.5) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_H7() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3119 to i16)) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_H8() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3117 to i16)) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_H9() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 65) + ret i32 %v0 +} + +;============================================================================== +; 'A' constraint, 32-bit operand +;============================================================================== + +; GCN-LABEL: {{^}}inline_A_constraint_F0: +; GCN: v_mov_b32 {{v[0-9]+}}, -16 +define i32 @inline_A_constraint_F0() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 -16) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_F1: +; GCN: v_mov_b32 {{v[0-9]+}}, 1 +define i32 @inline_A_constraint_F1() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 1) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_F2: +; GCN: v_mov_b32 {{v[0-9]+}}, 0xbf000000 +define i32 @inline_A_constraint_F2() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 bitcast (float -0.5 to i32)) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_F3: +; GCN: v_mov_b32 {{v[0-9]+}}, 0x40000000 +define i32 @inline_A_constraint_F3() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 bitcast (float 2.0 to i32)) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_F4: +; GCN: v_mov_b32 {{v[0-9]+}}, 0xc0800000 +define i32 @inline_A_constraint_F4() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(float -4.0) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_F5: +; VI: v_mov_b32 {{v[0-9]+}}, 0x3e22f983 +define i32 @inline_A_constraint_F5() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 1042479491) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_F6: +; GCN: v_mov_b32 {{v[0-9]+}}, 0x3f000000 +define i32 @inline_A_constraint_F6() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(float 0.5) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_F7() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 1042479490) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_F8() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 -17) + ret i32 %v0 +} + +;============================================================================== +; 'A' constraint, 64-bit operand +;============================================================================== + +; GCN-LABEL: {{^}}inline_A_constraint_D0: +; GCN: v_mov_b32 {{v[0-9]+}}, -16 +define i32 @inline_A_constraint_D0() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i64 -16) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_D1: +; GCN: v_cvt_f32_f64 {{v[0-9]+}}, 0xc000000000000000 +define i32 @inline_A_constraint_D1() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(i64 bitcast (double -2.0 to i64)) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_D2: +; GCN: v_cvt_f32_f64 {{v[0-9]+}}, 0x3fe0000000000000 +define i32 @inline_A_constraint_D2() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(double 0.5) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_D3: +; VI: v_cvt_f32_f64 {{v[0-9]+}}, 0x3fc45f306dc9c882 +define i32 @inline_A_constraint_D3() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(double 0.15915494309189532) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_D4: +; VI: v_cvt_f32_f64 {{v[0-9]+}}, 0x3fc45f306dc9c882 +define i32 @inline_A_constraint_D4() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(i64 bitcast (double 0.15915494309189532 to i64)) + ret i32 %v0 +} + +; GCN-LABEL: {{^}}inline_A_constraint_D5: +; GCN: v_cvt_f32_f64 {{v[0-9]+}}, 0xc000000000000000 +define i32 @inline_A_constraint_D5() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(double -2.0) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_D8() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(double 1.1) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_D9() { + %v0 = tail call i32 asm "v_cvt_f32_f64 $0, $1", "=v,A"(i64 bitcast (double 0.1 to i64)) + ret i32 %v0 +} + +;============================================================================== +; 'A' constraint, v2x16 operand +;============================================================================== + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_V0: +; VI: v_mov_b32 {{v[0-9]+}}, -4 +define i32 @inline_A_constraint_V0() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x i16> ) + ret i32 %v0 +} + +; NOSI: error: invalid operand for inline asm constraint 'A' +; VI-LABEL: {{^}}inline_A_constraint_V1: +; VI: v_mov_b32 {{v[0-9]+}}, 0xb800 +define i32 @inline_A_constraint_V1() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x half> ) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_V2() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x i16> ) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_V3() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x half> ) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_V4() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x i16> ) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_V5() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<4 x i16> ) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_V6() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x i32> ) + ret i32 %v0 +} + +;============================================================================== +; 'A' constraint, type errors +;============================================================================== + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_E1(i32 %x) { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i32 %x) + ret i32 %v0 +} + +; NOGCN: error: invalid operand for inline asm constraint 'A' +define i32 @inline_A_constraint_E2() { + %v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i128 100000000000000000000) + ret i32 %v0 +} From 8e62f3b658cc85bf0a42dec1326c5e87e848485c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 12:11:53 +0100 Subject: [PATCH 013/770] TargetInstrInfo.h - remove unnecessary includes. NFC. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 30ab1039bef00..8c6d845215948 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -17,7 +17,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" @@ -26,7 +25,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" From 0e83e67cd359aef475e5c3b86c1a5c932cfb1aba Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 12:20:58 +0100 Subject: [PATCH 014/770] SystemZInstrBuilder.h - remove unnecessary PseudoSourceValue.h include. NFC. --- llvm/lib/Target/SystemZ/SystemZInstrBuilder.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h index 57c10648612dc..9fc786f92635f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" namespace llvm { From 9fa58d1bf2f83a556c109f701aacfb92e2184c23 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 12:41:08 +0100 Subject: [PATCH 015/770] [DAG] Add SimplifyDemandedVectorElts binop SimplifyMultipleUseDemandedBits handling For the supported binops (basic arithmetic, logicals + shifts), if we fail to simplify the demanded vector elts, then call SimplifyMultipleUseDemandedBits and try to peek through ops to remove unnecessary dependencies. This helps with PR40502. Differential Revision: https://reviews.llvm.org/D79003 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 75 ++++++++--- llvm/test/CodeGen/AArch64/mul_by_elt.ll | 2 +- llvm/test/CodeGen/X86/combine-pmuldq.ll | 8 +- llvm/test/CodeGen/X86/combine-sdiv.ll | 44 ++++--- llvm/test/CodeGen/X86/oddsubvector.ll | 116 +++++++----------- llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 37 ++---- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 7 +- llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 37 ++---- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 7 +- llvm/test/CodeGen/X86/vector-narrow-binop.ll | 9 +- 10 files changed, 166 insertions(+), 176 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index eabfd7fc85cbb..b6fdddc46ede5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2225,6 +2225,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); APInt DemandedElts = OriginalDemandedElts; unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); @@ -2256,7 +2257,26 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDLoc DL(Op); unsigned EltSizeInBits = VT.getScalarSizeInBits(); - switch (Op.getOpcode()) { + // Helper for demanding the specified elements and all the bits of both binary + // operands. + auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) { + unsigned NumBits0 = Op0.getScalarValueSizeInBits(); + unsigned NumBits1 = Op1.getScalarValueSizeInBits(); + APInt DemandedBits0 = APInt::getAllOnesValue(NumBits0); + APInt DemandedBits1 = APInt::getAllOnesValue(NumBits1); + SDValue NewOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits0, DemandedElts, TLO.DAG, Depth + 1); + SDValue NewOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits1, DemandedElts, TLO.DAG, Depth + 1); + if (NewOp0 || NewOp1) { + SDValue NewOp = TLO.DAG.getNode( + Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1); + return TLO.CombineTo(Op, NewOp); + } + return false; + }; + + switch (Opcode) { case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) { KnownUndef.setAllBits(); @@ -2635,7 +2655,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } - // TODO: There are more binop opcodes that could be handled here - MUL, MIN, + // TODO: There are more binop opcodes that could be handled here - MIN, // MAX, saturated math, etc. case ISD::OR: case ISD::XOR: @@ -2646,17 +2666,26 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; APInt UndefLHS, ZeroLHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; KnownZero = ZeroLHS & ZeroRHS; KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::SHL: @@ -2664,27 +2693,39 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::SRA: case ISD::ROTL: case ISD::ROTR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; APInt UndefLHS, ZeroLHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; KnownZero = ZeroLHS; KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop? + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::MUL: case ISD::AND: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, - KnownZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero, + TLO, Depth + 1)) return true; // If either side has a zero element, then the result element is zero, even @@ -2694,6 +2735,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero |= SrcZero; KnownUndef &= SrcUndef; KnownUndef &= ~KnownZero; + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::TRUNCATE: diff --git a/llvm/test/CodeGen/AArch64/mul_by_elt.ll b/llvm/test/CodeGen/AArch64/mul_by_elt.ll index c51ef8c379ccc..c9fc2dea28eba 100644 --- a/llvm/test/CodeGen/AArch64/mul_by_elt.ll +++ b/llvm/test/CodeGen/AArch64/mul_by_elt.ll @@ -133,7 +133,7 @@ define <4 x float> @splat0_before_fmul_fmul_constant(<4 x float> %a) { ; CHECK-LABEL: splat0_before_fmul_fmul_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov v1.4s, #3.00000000 -; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0] +; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: fmov v1.4s, #6.00000000 ; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index 82387c936233c..0e448f3f3be06 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -187,7 +187,7 @@ define i32 @PR43159(<4 x i32>* %a0) { ; SSE-NEXT: pmuludq %xmm1, %xmm2 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE-NEXT: psubd %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm3, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm2 @@ -213,7 +213,7 @@ define i32 @PR43159(<4 x i32>* %a0) { ; AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 @@ -238,7 +238,7 @@ define i32 @PR43159(<4 x i32>* %a0) { ; AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsubd %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 @@ -263,7 +263,7 @@ define i32 @PR43159(<4 x i32>* %a0) { ; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX512DQVL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; AVX512DQVL-NEXT: vpsubd %xmm2, %xmm0, %xmm0 ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; AVX512DQVL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 235a6f0f33421..618b0a8d26067 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -1997,12 +1997,12 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm2[1] ; SSE2-NEXT: psrad $2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0,3] -; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psubd %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: psubd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg: @@ -2022,12 +2022,11 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: psrad $2, %xmm3 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: psubd %xmm3, %xmm2 ; SSE41-NEXT: psrad $3, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psubd %xmm1, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -2043,12 +2042,11 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2 ; AVX1-NEXT: vpsrad $2, %xmm1, %xmm3 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsrad $3, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] ; AVX1-NEXT: retq ; ; AVX2ORLATER-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg: @@ -2057,10 +2055,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; AVX2ORLATER-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 ; AVX2ORLATER-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; AVX2ORLATER-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 -; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX2ORLATER-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2ORLATER-NEXT: vpsubd %xmm0, %xmm1, %xmm1 -; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX2ORLATER-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2ORLATER-NEXT: vpsubd %xmm1, %xmm2, %xmm2 +; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; AVX2ORLATER-NEXT: retq ; ; XOP-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg: @@ -2069,10 +2067,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; XOP-NEXT: vpshld {{.*}}(%rip), %xmm1, %xmm1 ; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; XOP-NEXT: vpshad {{.*}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; XOP-NEXT: vpsubd %xmm1, %xmm2, %xmm2 +; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] ; XOP-NEXT: retq %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll index b42578eafdfd8..8d3e01f86def6 100644 --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -192,82 +192,67 @@ define void @PR42833() { ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: addl .Lb${{.*}}(%rip), %eax ; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: movaps {{.*#+}} xmm3 = -; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3] -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: paddd %xmm3, %xmm4 -; SSE2-NEXT: pslld $23, %xmm3 -; SSE2-NEXT: paddd {{.*}}(%rip), %xmm3 -; SSE2-NEXT: cvttps2dq %xmm3, %xmm3 -; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: pmuludq %xmm3, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq %xmm3, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,2,2,3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] -; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3] -; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3 -; SSE2-NEXT: psubd %xmm1, %xmm3 +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: paddd %xmm0, %xmm3 +; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4 +; SSE2-NEXT: psubd %xmm1, %xmm4 ; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: paddd %xmm0, %xmm5 +; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3] ; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip) ; SSE2-NEXT: movaps %xmm5, .Lc$local+{{.*}}(%rip) ; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm4 +; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm3 ; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5 ; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6 ; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm7 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; SSE2-NEXT: psubd %xmm0, %xmm7 -; SSE2-NEXT: psubd %xmm4, %xmm6 +; SSE2-NEXT: psubd %xmm3, %xmm6 ; SSE2-NEXT: psubd %xmm1, %xmm5 ; SSE2-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip) ; SSE2-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip) ; SSE2-NEXT: movdqa %xmm7, .Ld$local+{{.*}}(%rip) -; SSE2-NEXT: paddd %xmm4, %xmm4 +; SSE2-NEXT: paddd %xmm3, %xmm3 ; SSE2-NEXT: paddd %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm3, .Lc$local+{{.*}}(%rip) ; SSE2-NEXT: retq ; ; SSE42-LABEL: PR42833: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 ; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 -; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 +; SSE42-NEXT: movd %xmm1, %eax ; SSE42-NEXT: addl .Lb${{.*}}(%rip), %eax -; SSE42-NEXT: movdqa {{.*#+}} xmm2 = -; SSE42-NEXT: pinsrd $0, %eax, %xmm2 -; SSE42-NEXT: movdqa %xmm0, %xmm3 -; SSE42-NEXT: paddd %xmm2, %xmm3 -; SSE42-NEXT: pslld $23, %xmm2 -; SSE42-NEXT: paddd {{.*}}(%rip), %xmm2 -; SSE42-NEXT: cvttps2dq %xmm2, %xmm2 -; SSE42-NEXT: pmulld %xmm0, %xmm2 -; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3,4,5,6,7] +; SSE42-NEXT: movd %eax, %xmm2 +; SSE42-NEXT: paddd %xmm1, %xmm2 ; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3 -; SSE42-NEXT: psubd %xmm1, %xmm3 -; SSE42-NEXT: paddd %xmm1, %xmm1 -; SSE42-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 +; SSE42-NEXT: psubd %xmm0, %xmm3 +; SSE42-NEXT: paddd %xmm0, %xmm0 +; SSE42-NEXT: movdqa %xmm1, %xmm4 +; SSE42-NEXT: paddd %xmm1, %xmm4 +; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7] +; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip) +; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 ; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm2 ; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4 ; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5 ; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6 -; SSE42-NEXT: pinsrd $0, %eax, %xmm0 -; SSE42-NEXT: psubd %xmm0, %xmm6 +; SSE42-NEXT: pinsrd $0, %eax, %xmm1 +; SSE42-NEXT: psubd %xmm1, %xmm6 ; SSE42-NEXT: psubd %xmm2, %xmm5 -; SSE42-NEXT: psubd %xmm1, %xmm4 +; SSE42-NEXT: psubd %xmm0, %xmm4 ; SSE42-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip) ; SSE42-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip) ; SSE42-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip) ; SSE42-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip) ; SSE42-NEXT: paddd %xmm2, %xmm2 -; SSE42-NEXT: paddd %xmm1, %xmm1 -; SSE42-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip) +; SSE42-NEXT: paddd %xmm0, %xmm0 +; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip) ; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip) ; SSE42-NEXT: retq ; @@ -276,17 +261,13 @@ define void @PR42833() { ; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: addl .Lb${{.*}}(%rip), %eax -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2 ; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3 -; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 -; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpslld $1, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7] +; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2 ; AVX1-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip) @@ -316,10 +297,9 @@ define void @PR42833() { ; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 ; AVX2-NEXT: addl .Lc$local+{{.*}}(%rip), %eax ; AVX2-NEXT: vmovd %eax, %xmm1 -; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0],mem[1,2,3,4,5,6,7] -; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm3 -; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm2 -; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3,4,5,6,7] +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7] ; AVX2-NEXT: vmovdqu %ymm2, .Lc$local+{{.*}}(%rip) ; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm2 ; AVX2-NEXT: vmovdqu .Ld$local+{{.*}}(%rip), %ymm3 @@ -341,10 +321,9 @@ define void @PR42833() { ; AVX512-NEXT: vmovdqu64 .Lc$local+{{.*}}(%rip), %zmm1 ; AVX512-NEXT: addl .Lc$local+{{.*}}(%rip), %eax ; AVX512-NEXT: vmovd %eax, %xmm2 -; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],mem[1,2,3,4,5,6,7] -; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm3 -; AVX512-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0],ymm0[1,2,3,4,5,6,7] +; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm2 +; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] ; AVX512-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm2 ; AVX512-NEXT: vmovdqu %ymm0, .Lc$local+{{.*}}(%rip) ; AVX512-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 @@ -364,14 +343,13 @@ define void @PR42833() { ; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0 ; XOP-NEXT: vmovd %xmm0, %eax ; XOP-NEXT: addl .Lb${{.*}}(%rip), %eax -; XOP-NEXT: vmovdqa {{.*#+}} xmm1 = -; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 -; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; XOP-NEXT: vmovd %eax, %xmm1 +; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2 ; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3 -; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm1 -; XOP-NEXT: vpslld $1, %xmm3, %xmm3 -; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7] +; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3 +; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7] ; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2 ; XOP-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2 ; XOP-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip) diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index 43d02fca5d500..4d5b148b362e8 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -671,7 +671,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind { ; SSE-LABEL: splatvar_funnnel_v2i64: ; SSE: # %bb.0: -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63] ; SSE-NEXT: pxor %xmm3, %xmm3 ; SSE-NEXT: psubq %xmm1, %xmm3 @@ -683,31 +682,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind ; SSE-NEXT: por %xmm4, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: splatvar_funnnel_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_funnnel_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX2-NEXT: vpsllq %xmm3, %xmm0, %xmm3 -; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_funnnel_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index e4c76d59f3339..e8fb824076f2f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -514,9 +514,9 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 @@ -537,7 +537,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 406f9b614a43a..e923df1c01423 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -711,7 +711,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind { ; SSE-LABEL: splatvar_funnnel_v2i64: ; SSE: # %bb.0: -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63] ; SSE-NEXT: pxor %xmm3, %xmm3 ; SSE-NEXT: psubq %xmm1, %xmm3 @@ -723,31 +722,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind ; SSE-NEXT: por %xmm4, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: splatvar_funnnel_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_funnnel_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX2-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 -; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_funnnel_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 +; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index ec18f8948771d..723a9dc51bc82 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -560,9 +560,9 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 @@ -583,7 +583,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-narrow-binop.ll b/llvm/test/CodeGen/X86/vector-narrow-binop.ll index d76f0dc544589..383fde7038d04 100644 --- a/llvm/test/CodeGen/X86/vector-narrow-binop.ll +++ b/llvm/test/CodeGen/X86/vector-narrow-binop.ll @@ -151,12 +151,11 @@ define <4 x double> @fmul_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE: # %bb.0: ; SSE-NEXT: movapd %xmm1, %xmm2 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; SSE-NEXT: mulpd %xmm0, %xmm0 ; SSE-NEXT: mulpd %xmm2, %xmm2 -; SSE-NEXT: addpd %xmm0, %xmm2 -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: mulpd %xmm1, %xmm1 +; SSE-NEXT: addpd %xmm2, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: fmul_v2f64: From 7b15dc1e0e8dfaf3efb608734421eac4e2399d6a Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 25 May 2020 19:42:15 +0800 Subject: [PATCH 016/770] [ObjectYAML][DWARF] Remove unimplemented function. ``` StringMap> EmitDebugSections(llvm::DWARFYAML::Data &DI, bool ApplyFixups); ``` is unimplemented and unused. --- llvm/include/llvm/ObjectYAML/DWARFEmitter.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index 092aa0040f95b..b6613265c7782 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -41,9 +41,6 @@ void EmitDebugLine(raw_ostream &OS, const Data &DI); Expected>> EmitDebugSections(StringRef YAMLString, bool ApplyFixups = false, bool IsLittleEndian = sys::IsLittleEndianHost); -StringMap> -EmitDebugSections(llvm::DWARFYAML::Data &DI, bool ApplyFixups); - } // end namespace DWARFYAML } // end namespace llvm From 9ff361b099f16ce27c8af61806447df5bca52228 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 25 May 2020 12:25:03 +0100 Subject: [PATCH 017/770] [ARM] VMULH tests for when other parts are working. NFC --- llvm/test/CodeGen/Thumb2/mve-vmulh.ll | 529 ++++++++++++++++++++++++++ 1 file changed, 529 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmulh.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-vmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vmulh.ll new file mode 100644 index 0000000000000..36b3b1e6c312f --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vmulh.ll @@ -0,0 +1,529 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK + +define arm_aapcs_vfpcc <2 x i32> @vmulhs_v2i32(<2 x i32> %s0, <2 x i32> %s1) { +; CHECK-LABEL: vmulhs_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmullb.s32 q2, q0, q1 +; CHECK-NEXT: vmov r0, s9 +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov r0, s11 +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: bx lr +entry: + %s0s = sext <2 x i32> %s0 to <2 x i64> + %s1s = sext <2 x i32> %s1 to <2 x i64> + %m = mul <2 x i64> %s0s, %s1s + %s = ashr <2 x i64> %m, + %s2 = trunc <2 x i64> %s to <2 x i32> + ret <2 x i32> %s2 +} + +define arm_aapcs_vfpcc <2 x i32> @vmulhu_v2i32(<2 x i32> %s0, <2 x i32> %s1) { +; CHECK-LABEL: vmulhu_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmullb.u32 q2, q0, q1 +; CHECK-NEXT: vldr s1, .LCPI1_0 +; CHECK-NEXT: vmov.f32 s0, s9 +; CHECK-NEXT: vmov.f32 s2, s11 +; CHECK-NEXT: vmov.f32 s3, s1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 0x00000000 @ float 0 +entry: + %s0s = zext <2 x i32> %s0 to <2 x i64> + %s1s = zext <2 x i32> %s1 to <2 x i64> + %m = mul <2 x i64> %s0s, %s1s + %s = lshr <2 x i64> %m, + %s2 = trunc <2 x i64> %s to <2 x i32> + ret <2 x i32> %s2 +} + +define arm_aapcs_vfpcc <4 x i32> @vmulhs_v4i32(<4 x i32> %s0, <4 x i32> %s1) { +; CHECK-LABEL: vmulhs_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s12, s0 +; CHECK-NEXT: vmov.f32 s14, s1 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: vmov r0, s10 +; CHECK-NEXT: vmov.f32 s12, s6 +; CHECK-NEXT: vmov.f32 s14, s7 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: vmullb.s32 q0, q1, q3 +; CHECK-NEXT: smmul r0, r1, r0 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: smmul r1, r2, r1 +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %s0s = sext <4 x i32> %s0 to <4 x i64> + %s1s = sext <4 x i32> %s1 to <4 x i64> + %m = mul <4 x i64> %s0s, %s1s + %s = ashr <4 x i64> %m, + %s2 = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %s2 +} + +define arm_aapcs_vfpcc <4 x i32> @vmulhu_v4i32(<4 x i32> %s0, <4 x i32> %s1) { +; CHECK-LABEL: vmulhu_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.f32 s12, s6 +; CHECK-NEXT: vmov.f32 s16, s2 +; CHECK-NEXT: vmov.f32 s14, s7 +; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vmov.f32 s6, s5 +; CHECK-NEXT: vmullb.u32 q2, q4, q3 +; CHECK-NEXT: vmov.f32 s2, s1 +; CHECK-NEXT: vmullb.u32 q3, q0, q1 +; CHECK-NEXT: vmov.f32 s0, s13 +; CHECK-NEXT: vmov.f32 s1, s15 +; CHECK-NEXT: vmov.f32 s2, s9 +; CHECK-NEXT: vmov.f32 s3, s11 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %s0s = zext <4 x i32> %s0 to <4 x i64> + %s1s = zext <4 x i32> %s1 to <4 x i64> + %m = mul <4 x i64> %s0s, %s1s + %s = lshr <4 x i64> %m, + %s2 = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %s2 +} + +define arm_aapcs_vfpcc <4 x i16> @vmulhs_v4i16(<4 x i16> %s0, <4 x i16> %s1) { +; CHECK-LABEL: vmulhs_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmullb.s16 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %s0s = sext <4 x i16> %s0 to <4 x i32> + %s1s = sext <4 x i16> %s1 to <4 x i32> + %m = mul <4 x i32> %s0s, %s1s + %s = ashr <4 x i32> %m, + %s2 = trunc <4 x i32> %s to <4 x i16> + ret <4 x i16> %s2 +} + +define arm_aapcs_vfpcc <4 x i16> @vmulhu_v4i16(<4 x i16> %s0, <4 x i16> %s1) { +; CHECK-LABEL: vmulhu_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmullb.u16 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %s0s = zext <4 x i16> %s0 to <4 x i32> + %s1s = zext <4 x i16> %s1 to <4 x i32> + %m = mul <4 x i32> %s0s, %s1s + %s = lshr <4 x i32> %m, + %s2 = trunc <4 x i32> %s to <4 x i16> + ret <4 x i16> %s2 +} + +define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) { +; CHECK-LABEL: vmulhs_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmullb.s16 q2, q3, q2 +; CHECK-NEXT: vshr.s32 q3, q2, #16 +; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov r0, s13 +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov r0, s14 +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov r0, s15 +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmullb.s16 q0, q1, q3 +; CHECK-NEXT: vshr.s32 q0, q0, #16 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %s0s = sext <8 x i16> %s0 to <8 x i32> + %s1s = sext <8 x i16> %s1 to <8 x i32> + %m = mul <8 x i32> %s0s, %s1s + %s = ashr <8 x i32> %m, + %s2 = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %s2 +} + +define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) { +; CHECK-LABEL: vmulhu_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmullb.u16 q2, q3, q2 +; CHECK-NEXT: vshr.u32 q3, q2, #16 +; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov r0, s13 +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov r0, s14 +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov r0, s15 +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmullb.u16 q0, q1, q3 +; CHECK-NEXT: vshr.u32 q0, q0, #16 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %s0s = zext <8 x i16> %s0 to <8 x i32> + %s1s = zext <8 x i16> %s1 to <8 x i32> + %m = mul <8 x i32> %s0s, %s1s + %s = lshr <8 x i32> %m, + %s2 = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %s2 +} + +define arm_aapcs_vfpcc <8 x i8> @vmulhs_v8i8(<8 x i8> %s0, <8 x i8> %s1) { +; CHECK-LABEL: vmulhs_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmullb.s8 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %s0s = sext <8 x i8> %s0 to <8 x i16> + %s1s = sext <8 x i8> %s1 to <8 x i16> + %m = mul <8 x i16> %s0s, %s1s + %s = ashr <8 x i16> %m, + %s2 = trunc <8 x i16> %s to <8 x i8> + ret <8 x i8> %s2 +} + +define arm_aapcs_vfpcc <8 x i8> @vmulhu_v8i8(<8 x i8> %s0, <8 x i8> %s1) { +; CHECK-LABEL: vmulhu_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmullb.u8 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %s0s = zext <8 x i8> %s0 to <8 x i16> + %s1s = zext <8 x i8> %s1 to <8 x i16> + %m = mul <8 x i16> %s0s, %s1s + %s = lshr <8 x i16> %m, + %s2 = trunc <8 x i16> %s to <8 x i8> + ret <8 x i8> %s2 +} + +define arm_aapcs_vfpcc <16 x i8> @vmulhs_v16i8(<16 x i8> %s0, <16 x i8> %s1) { +; CHECK-LABEL: vmulhs_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[2] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[3] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmullb.s8 q2, q3, q2 +; CHECK-NEXT: vshr.s16 q3, q2, #8 +; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vmov.8 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.8 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.8 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.8 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vmov.8 q2[4], r0 +; CHECK-NEXT: vmov.u16 r0, q3[5] +; CHECK-NEXT: vmov.8 q2[5], r0 +; CHECK-NEXT: vmov.u16 r0, q3[6] +; CHECK-NEXT: vmov.8 q2[6], r0 +; CHECK-NEXT: vmov.u16 r0, q3[7] +; CHECK-NEXT: vmov.8 q2[7], r0 +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[8] +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[9] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[11] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[12] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[13] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[15] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmullb.s8 q0, q1, q3 +; CHECK-NEXT: vshr.s16 q0, q0, #8 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.8 q2[8], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.8 q2[9], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.8 q2[10], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.8 q2[11], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.8 q2[12], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.8 q2[13], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.8 q2[14], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.8 q2[15], r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %s0s = sext <16 x i8> %s0 to <16 x i16> + %s1s = sext <16 x i8> %s1 to <16 x i16> + %m = mul <16 x i16> %s0s, %s1s + %s = ashr <16 x i16> %m, + %s2 = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %s2 +} + +define arm_aapcs_vfpcc <16 x i8> @vmulhu_v16i8(<16 x i8> %s0, <16 x i8> %s1) { +; CHECK-LABEL: vmulhu_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u8 r0, q1[0] +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[3] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[5] +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[6] +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[2] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[3] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmullb.u8 q2, q3, q2 +; CHECK-NEXT: vshr.u16 q3, q2, #8 +; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vmov.8 q2[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.8 q2[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.8 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.8 q2[3], r0 +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vmov.8 q2[4], r0 +; CHECK-NEXT: vmov.u16 r0, q3[5] +; CHECK-NEXT: vmov.8 q2[5], r0 +; CHECK-NEXT: vmov.u16 r0, q3[6] +; CHECK-NEXT: vmov.8 q2[6], r0 +; CHECK-NEXT: vmov.u16 r0, q3[7] +; CHECK-NEXT: vmov.8 q2[7], r0 +; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[9] +; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[11] +; CHECK-NEXT: vmov.16 q3[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[14] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u8 r0, q1[15] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmov.u8 r0, q0[8] +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u8 r0, q0[9] +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u8 r0, q0[11] +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u8 r0, q0[12] +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u8 r0, q0[13] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u8 r0, q0[15] +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmullb.u8 q0, q1, q3 +; CHECK-NEXT: vshr.u16 q0, q0, #8 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.8 q2[8], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.8 q2[9], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.8 q2[10], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.8 q2[11], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.8 q2[12], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.8 q2[13], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.8 q2[14], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.8 q2[15], r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %s0s = zext <16 x i8> %s0 to <16 x i16> + %s1s = zext <16 x i8> %s1 to <16 x i16> + %m = mul <16 x i16> %s0s, %s1s + %s = lshr <16 x i16> %m, + %s2 = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %s2 +} From 5a4bcec8db420cf22b06720d45a9f9981b0297bf Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Mon, 25 May 2020 06:32:51 -0500 Subject: [PATCH 018/770] [PowerPC][NFC] Split PPCELFStreamer::emitInstruction Split off PPCELFStreamer::emitPrefixedInstruction from PPCELFStreamer::emitInstruction. Differential Revision: https://reviews.llvm.org/D79626 --- .../PowerPC/MCTargetDesc/PPCELFStreamer.cpp | 26 +++++++++++-------- .../PowerPC/MCTargetDesc/PPCELFStreamer.h | 2 ++ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index c9760ed38bcc5..4373778cc96cc 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -44,17 +44,8 @@ PPCELFStreamer::PPCELFStreamer(MCContext &Context, std::move(Emitter)), LastLabel(NULL) { } -void PPCELFStreamer::emitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { - PPCMCCodeEmitter *Emitter = - static_cast(getAssembler().getEmitterPtr()); - - // Special handling is only for prefixed instructions. - if (!Emitter->isPrefixedInstruction(Inst)) { - MCELFStreamer::emitInstruction(Inst, STI); - return; - } - +void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { // Prefixed instructions must not cross a 64-byte boundary (i.e. prefix is // before the boundary and the remaining 4-bytes are after the boundary). In // order to achieve this, a nop is added prior to any such boundary-crossing @@ -93,6 +84,19 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst, } } +void PPCELFStreamer::emitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + PPCMCCodeEmitter *Emitter = + static_cast(getAssembler().getEmitterPtr()); + + // Special handling is only for prefixed instructions. + if (!Emitter->isPrefixedInstruction(Inst)) { + MCELFStreamer::emitInstruction(Inst, STI); + return; + } + emitPrefixedInstruction(Inst, STI); +} + void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { LastLabel = Symbol; LastLabelLoc = Loc; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h index 403681ed383aa..51863232d0719 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h @@ -41,6 +41,8 @@ class PPCELFStreamer : public MCELFStreamer { // EmitLabel updates LastLabel and LastLabelLoc when a new label is emitted. void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; +private: + void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); }; MCELFStreamer *createPPCELFStreamer(MCContext &Context, From 7293dd5b4033d94ce1397b192a93010e64b2d949 Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Mon, 25 May 2020 07:42:17 -0400 Subject: [PATCH 019/770] Added pow intrinsic to LLVMIR dialect Added pow intrinsic to LLVMIR dialect. Added a roundrip test for it. Differential Revision: https://reviews.llvm.org/D80248 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 1 + mlir/test/Dialect/LLVMIR/roundtrip.mlir | 3 +++ mlir/test/Target/llvmir-intrinsics.mlir | 9 +++++++++ 3 files changed, 13 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 9a7836cd558cb..4be27b94e75e1 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -790,6 +790,7 @@ def LLVM_Prefetch : LLVM_ZeroResultIntrOp<"prefetch", [0]>, LLVM_Type:$hint, LLVM_Type:$cache)>; def LLVM_SinOp : LLVM_UnaryIntrinsicOp<"sin">; def LLVM_SqrtOp : LLVM_UnaryIntrinsicOp<"sqrt">; +def LLVM_PowOp : LLVM_BinarySameArgsIntrinsicOp<"pow">; // // Vector Reductions. diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 79f9078e56778..d93de93882bea 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -100,6 +100,9 @@ func @ops(%arg0 : !llvm.i32, %arg1 : !llvm.float) { // CHECK: "llvm.intr.sin"(%arg1) : (!llvm.float) -> !llvm.float %30 = "llvm.intr.sin"(%arg1) : (!llvm.float) -> !llvm.float +// CHECK: "llvm.intr.pow"(%arg1, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float + %31 = "llvm.intr.pow"(%arg1, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float + // CHECK: llvm.return llvm.return } diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir index c332bc23b814e..17c586e9a88b6 100644 --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -108,6 +108,15 @@ llvm.func @copysign_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"< llvm.return } +// CHECK-LABEL: @pow_test +llvm.func @pow_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"<8 x float>">, %arg3: !llvm<"<8 x float>">) { + // CHECK: call float @llvm.pow.f32 + "llvm.intr.pow"(%arg0, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float + // CHECK: call <8 x float> @llvm.pow.v8f32 + "llvm.intr.pow"(%arg2, %arg3) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + llvm.return +} + // CHECK-LABEL: @vector_reductions llvm.func @vector_reductions(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">, %arg2: !llvm<"<8 x i32>">) { // CHECK: call i32 @llvm.experimental.vector.reduce.add.v8i32 From 38366cf1676f9ac8d421586658e8bcd5ac4ab62d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 12:52:14 +0100 Subject: [PATCH 020/770] FunctionLoweringInfo.h - remove orphan addSEHHandlersForLPads declaration. NFC. --- llvm/include/llvm/CodeGen/FunctionLoweringInfo.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index bc5e4be674287..37e1e0de6510f 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -257,8 +257,6 @@ class FunctionLoweringInfo { const TargetRegisterClass *RC); private: - void addSEHHandlersForLPads(ArrayRef LPads); - /// LiveOutRegInfo - Information about live out vregs. IndexedMap LiveOutRegInfo; }; From 8f48814879c06bbf9f211fa5d959419f0d2d38b6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 12:56:44 +0100 Subject: [PATCH 021/770] FunctionLoweringInfo.h - move APInt.h dependency to FunctionLoweringInfo.cpp. NFC. --- llvm/include/llvm/CodeGen/FunctionLoweringInfo.h | 2 +- llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index 37e1e0de6510f..c99ca00eac29f 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H -#include "llvm/ADT/APInt.h" + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4f46f19905319..7a5fd7d24c681 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/ADT/APInt.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" From fa038e03504c7d0dfd438b1dfdd6da7081e75617 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 25 May 2020 07:50:45 -0400 Subject: [PATCH 022/770] [x86] favor vector constant load to avoid GPR to XMM transfer, part 2 This replaces the build_vector lowering code that was just added in D80013 and matches the pattern later from the x86-specific "vzext_movl". That seems to result in the same or better improvements and gets rid of the 'TODO' items from that patch. AFAICT, we always shrink wider constant vectors to 128-bit on these patterns, so we still get the implicit zero-extension to ymm/zmm without wasting space on larger vector constants. There's a trade-off there because that means we miss potential load-folding. Similarly, we could load scalar constants here with implicit zero-extension even to 128-bit. That saves constant space, but it means we forego load-folding, and so it increases register pressure. This seems like a good middle-ground between those 2 options. Differential Revision: https://reviews.llvm.org/D80131 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 33 +++++++--- llvm/test/CodeGen/X86/avx-load-store.ll | 10 ++- llvm/test/CodeGen/X86/avx2-arith.ll | 6 +- llvm/test/CodeGen/X86/combine-udiv.ll | 40 ++++-------- llvm/test/CodeGen/X86/fcmp-constant.ll | 3 +- .../X86/insert-into-constant-vector.ll | 48 ++++++-------- llvm/test/CodeGen/X86/packss.ll | 7 +- llvm/test/CodeGen/X86/pshufb-mask-comments.ll | 2 +- llvm/test/CodeGen/X86/ret-mmx.ll | 2 +- llvm/test/CodeGen/X86/sad.ll | 65 +++++-------------- .../CodeGen/X86/srem-seteq-vec-nonsplat.ll | 6 +- llvm/test/CodeGen/X86/vec_set-A.ll | 2 +- llvm/test/CodeGen/X86/vec_shift2.ll | 4 +- llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 12 ++-- .../CodeGen/X86/vector-shuffle-256-v16.ll | 3 +- .../CodeGen/X86/vector-shuffle-256-v32.ll | 16 ++--- .../test/CodeGen/X86/vector-shuffle-256-v8.ll | 10 ++- .../CodeGen/X86/vector-shuffle-512-v32.ll | 10 ++- .../CodeGen/X86/vector-shuffle-512-v64.ll | 10 ++- .../test/CodeGen/X86/vector-shuffle-512-v8.ll | 10 ++- .../X86/vector-shuffle-combining-avx512f.ll | 10 ++- .../X86/vector-shuffle-combining-xop.ll | 29 +++------ llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 12 ++-- llvm/test/CodeGen/X86/vector-tzcnt-128.ll | 24 +++---- 24 files changed, 147 insertions(+), 227 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5fc8448d1e725..eab9f14bec910 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10209,15 +10209,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (NumZero == 0) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - // Just load a vector integer constant. Loading is better for code size, - // avoids move GPR immediate --> XMM, and reduces register pressure. - if (IsAllConstants && VT.isInteger()) { - // TODO: Remove -1 restriction with demanded elements improvement? - // TODO: Insert 128-bit load into wider undef vector? - if (VT.is128BitVector() && !isAllOnesConstant(Item)) - return SDValue(); - } - if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit())) { assert((VT.is128BitVector() || VT.is256BitVector() || @@ -35858,6 +35849,30 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } } + // Load a scalar integer constant directly to XMM instead of transferring an + // immediate value from GPR. + // vzext_movl (scalar_to_vector C) --> load [C,0...] + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR) { + if (auto *C = dyn_cast(N0.getOperand(0))) { + // Create a vector constant - scalar constant followed by zeros. + EVT ScalarVT = N0.getOperand(0).getValueType(); + Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext()); + unsigned NumElts = VT.getVectorNumElements(); + Constant *Zero = ConstantInt::getNullValue(ScalarTy); + SmallVector ConstantVec(NumElts, Zero); + ConstantVec[0] = const_cast(C->getConstantIntValue()); + + // Load the vector constant from constant pool. + MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT); + MachinePointerInfo MPI = + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); + Align Alignment = cast(CP)->getAlign(); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment, + MachineMemOperand::MOLoad); + } + } + return SDValue(); } case X86ISD::BLENDI: { diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index 1b3c35855ae9a..718449d7a771f 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -220,8 +220,7 @@ define void @f_f() nounwind { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB9_4 ; CHECK-NEXT: # %bb.3: # %cif_mixed_test_all -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,0,0,0] ; CHECK-NEXT: vmaskmovps %ymm0, %ymm0, (%rax) ; CHECK-NEXT: .LBB9_4: # %cif_mixed_test_any_check ; @@ -238,13 +237,12 @@ define void @f_f() nounwind { ; CHECK_O0-NEXT: jne .LBB9_3 ; CHECK_O0-NEXT: jmp .LBB9_4 ; CHECK_O0-NEXT: .LBB9_3: # %cif_mixed_test_all -; CHECK_O0-NEXT: movl $-1, %eax -; CHECK_O0-NEXT: vmovd %eax, %xmm0 +; CHECK_O0-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0] ; CHECK_O0-NEXT: vmovdqa %xmm0, %xmm0 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 -; CHECK_O0-NEXT: # implicit-def: $rcx +; CHECK_O0-NEXT: # implicit-def: $rax ; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rcx) +; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rax) ; CHECK_O0-NEXT: .LBB9_4: # %cif_mixed_test_any_check allocas: br i1 undef, label %cif_mask_all, label %cif_mask_mixed diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll index 39a81a2dc020a..b694b98d04c47 100644 --- a/llvm/test/CodeGen/X86/avx2-arith.ll +++ b/llvm/test/CodeGen/X86/avx2-arith.ll @@ -347,15 +347,13 @@ define <8 x i16> @mul_const8(<8 x i16> %x) { define <8 x i32> @mul_const9(<8 x i32> %x) { ; X32-LABEL: mul_const9: ; X32: # %bb.0: -; X32-NEXT: movl $2, %eax -; X32-NEXT: vmovd %eax, %xmm1 +; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [2,0,0,0] ; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const9: ; X64: # %bb.0: -; X64-NEXT: movl $2, %eax -; X64-NEXT: vmovd %eax, %xmm1 +; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2,0,0,0] ; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %y = mul <8 x i32> %x, diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index c06719b91e273..e6d7aac926162 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -590,9 +590,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_udiv_nonuniform2: ; XOP: # %bb.0: -; XOP-NEXT: movl $65535, %eax # imm = 0xFFFF -; XOP-NEXT: vmovd %eax, %xmm1 -; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq @@ -664,31 +662,17 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_vec_udiv_nonuniform4: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpackuswb %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_vec_udiv_nonuniform4: -; AVX2: # %bb.0: -; AVX2-NEXT: movl $171, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmullw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX2-NEXT: vpackuswb %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsrlw $7, %xmm1, %xmm1 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: combine_vec_udiv_nonuniform4: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT: vpackuswb %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsrlw $7, %xmm1, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_nonuniform4: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/fcmp-constant.ll b/llvm/test/CodeGen/X86/fcmp-constant.ll index 239830f980890..481a32b39dd37 100644 --- a/llvm/test/CodeGen/X86/fcmp-constant.ll +++ b/llvm/test/CodeGen/X86/fcmp-constant.ll @@ -92,8 +92,7 @@ define <2 x i64> @fcmp_ueq_v2f64_undef() { define <2 x i64> @fcmp_ueq_v2f64_undef_elt() { ; CHECK-LABEL: fcmp_ueq_v2f64_undef_elt: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,0] ; CHECK-NEXT: retq %1 = fcmp ueq <2 x double> , %2 = sext <2 x i1> %1 to <2 x i64> diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index 18d57e9280557..779c91ab2575c 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -129,10 +129,8 @@ define <4 x i32> @elt3_v4i32(i32 %x) { define <2 x i64> @elt0_v2i64(i64 %x) { ; X32SSE-LABEL: elt0_v2i64: ; X32SSE: # %bb.0: -; X32SSE-NEXT: movl $1, %eax -; X32SSE-NEXT: movd %eax, %xmm1 -; X32SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X32SSE-NEXT: retl ; ; X64SSE2-LABEL: elt0_v2i64: @@ -150,10 +148,8 @@ define <2 x i64> @elt0_v2i64(i64 %x) { ; ; X32AVX-LABEL: elt0_v2i64: ; X32AVX: # %bb.0: -; X32AVX-NEXT: movl $1, %eax -; X32AVX-NEXT: vmovd %eax, %xmm0 -; X32AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X32AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X32AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X32AVX-NEXT: retl ; ; X64AVX-LABEL: elt0_v2i64: @@ -365,10 +361,9 @@ define <8 x float> @elt6_v8f32(float %x) { define <8 x i64> @elt5_v8i64(i64 %x) { ; X32SSE-LABEL: elt5_v8i64: ; X32SSE: # %bb.0: -; X32SSE-NEXT: movl $4, %eax -; X32SSE-NEXT: movd %eax, %xmm2 -; X32SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X32SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4,0,0,0] +; X32SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; X32SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0] ; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0] ; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0] @@ -395,10 +390,9 @@ define <8 x i64> @elt5_v8i64(i64 %x) { ; ; X32AVX1-LABEL: elt5_v8i64: ; X32AVX1: # %bb.0: -; X32AVX1-NEXT: movl $4, %eax -; X32AVX1-NEXT: vmovd %eax, %xmm0 -; X32AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X32AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0] +; X32AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; X32AVX1-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm1 ; X32AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0] ; X32AVX1-NEXT: retl @@ -413,11 +407,10 @@ define <8 x i64> @elt5_v8i64(i64 %x) { ; ; X32AVX2-LABEL: elt5_v8i64: ; X32AVX2: # %bb.0: -; X32AVX2-NEXT: movl $4, %eax -; X32AVX2-NEXT: vmovd %eax, %xmm0 -; X32AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X32AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X32AVX2-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm0, %ymm1 +; X32AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0] +; X32AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X32AVX2-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm1 ; X32AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0] ; X32AVX2-NEXT: retl ; @@ -431,13 +424,12 @@ define <8 x i64> @elt5_v8i64(i64 %x) { ; ; X32AVX512F-LABEL: elt5_v8i64: ; X32AVX512F: # %bb.0: -; X32AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0] -; X32AVX512F-NEXT: movl $4, %eax -; X32AVX512F-NEXT: vmovd %eax, %xmm1 -; X32AVX512F-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X32AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X32AVX512F-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm1, %ymm1 -; X32AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X32AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0] +; X32AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X32AVX512F-NEXT: vmovaps {{.*#+}} xmm2 = [4,0,0,0] +; X32AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X32AVX512F-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm1, %ymm1 +; X32AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32AVX512F-NEXT: retl ; ; X64AVX512F-LABEL: elt5_v8i64: diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll index 9c8d1f301f62f..9a4025ab75e4b 100644 --- a/llvm/test/CodeGen/X86/packss.ll +++ b/llvm/test/CodeGen/X86/packss.ll @@ -159,13 +159,12 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) { ; X86-SSE-NEXT: psllq $63, %xmm1 ; X86-SSE-NEXT: psllq $63, %xmm0 ; X86-SSE-NEXT: psrlq $63, %xmm0 -; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <1,0,u,u> +; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,0,0,0] ; X86-SSE-NEXT: pxor %xmm2, %xmm0 -; X86-SSE-NEXT: pcmpeqd %xmm3, %xmm3 -; X86-SSE-NEXT: paddq %xmm3, %xmm0 +; X86-SSE-NEXT: psubq %xmm2, %xmm0 ; X86-SSE-NEXT: psrlq $63, %xmm1 ; X86-SSE-NEXT: pxor %xmm2, %xmm1 -; X86-SSE-NEXT: paddq %xmm3, %xmm1 +; X86-SSE-NEXT: psubq %xmm2, %xmm1 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: packssdw %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll index 9a1a000db93c8..416b5c8cb0966 100644 --- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll @@ -54,7 +54,7 @@ define <16 x i8> @test4(<16 x i8> %V, <2 x i64>* %P) { define <16 x i8> @test5(<16 x i8> %V) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,0] +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,0,0,0] ; CHECK-NEXT: movaps %xmm1, (%rax) ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,1] ; CHECK-NEXT: movaps %xmm1, (%rax) diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll index bca47e182953f..8c2ded4ebb362 100644 --- a/llvm/test/CodeGen/X86/ret-mmx.ll +++ b/llvm/test/CodeGen/X86/ret-mmx.ll @@ -32,7 +32,7 @@ define <1 x i64> @t2() nounwind { define <2 x i32> @t3() nounwind { ; CHECK-LABEL: t3: ; CHECK: ## %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1,0,u,u> +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,0,0,0] ; CHECK-NEXT: retq ret <2 x i32> } diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 66a8661698a53..006dd3d5ff178 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -544,8 +544,7 @@ define i32 @sad_2i8() nounwind { ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 -; SSE2-NEXT: movl $65535, %ecx # imm = 0xFFFF -; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB3_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -995,54 +994,20 @@ define i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; -; AVX1-LABEL: sad_unroll_nonzero_initial: -; AVX1: # %bb.0: # %bb -; AVX1-NEXT: vmovdqu (%rdi), %xmm0 -; AVX1-NEXT: vpsadbw (%rsi), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqu (%rdx), %xmm1 -; AVX1-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: retq -; -; AVX2-LABEL: sad_unroll_nonzero_initial: -; AVX2: # %bb.0: # %bb -; AVX2-NEXT: vmovdqu (%rdi), %xmm0 -; AVX2-NEXT: vpsadbw (%rsi), %xmm0, %xmm0 -; AVX2-NEXT: vmovdqu (%rdx), %xmm1 -; AVX2-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 -; AVX2-NEXT: movl $1, %eax -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: retq -; -; AVX512-LABEL: sad_unroll_nonzero_initial: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512-NEXT: vpsadbw (%rsi), %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu (%rdx), %xmm1 -; AVX512-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 -; AVX512-NEXT: movl $1, %eax -; AVX512-NEXT: vmovd %eax, %xmm2 -; AVX512-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: retq +; AVX-LABEL: sad_unroll_nonzero_initial: +; AVX: # %bb.0: # %bb +; AVX-NEXT: vmovdqu (%rdi), %xmm0 +; AVX-NEXT: vpsadbw (%rsi), %xmm0, %xmm0 +; AVX-NEXT: vmovdqu (%rdx), %xmm1 +; AVX-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq bb: %tmp = load <16 x i8>, <16 x i8>* %arg, align 1 %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1 diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll index 1eee782f90cac..6c72adbc63175 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll @@ -325,7 +325,7 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind { ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,0,u> +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,0,0,0] ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] @@ -452,7 +452,7 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind { ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,0,u> +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,0,0,0] ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] @@ -1314,7 +1314,7 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind { ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,0,u> +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,0,0,0] ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] diff --git a/llvm/test/CodeGen/X86/vec_set-A.ll b/llvm/test/CodeGen/X86/vec_set-A.ll index e246ef047231f..c8ff250b5bfbc 100644 --- a/llvm/test/CodeGen/X86/vec_set-A.ll +++ b/llvm/test/CodeGen/X86/vec_set-A.ll @@ -10,7 +10,7 @@ define <2 x i64> @test1() nounwind { ; ; X64-LABEL: test1: ; X64: # %bb.0: -; X64-NEXT: movaps {{.*#+}} xmm0 = [1,0] +; X64-NEXT: movaps {{.*#+}} xmm0 = [1,0,0,0] ; X64-NEXT: retq ret <2 x i64> < i64 1, i64 0 > } diff --git a/llvm/test/CodeGen/X86/vec_shift2.ll b/llvm/test/CodeGen/X86/vec_shift2.ll index 1f386bb5a1daf..a38187f190f99 100644 --- a/llvm/test/CodeGen/X86/vec_shift2.ll +++ b/llvm/test/CodeGen/X86/vec_shift2.ll @@ -5,12 +5,12 @@ define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind { ; X32-LABEL: t1: ; X32: # %bb.0: -; X32-NEXT: psrlw {{\.LCPI.*}}, %xmm0 +; X32-NEXT: psrlw $14, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: t1: ; X64: # %bb.0: -; X64-NEXT: psrlw {{.*}}(%rip), %xmm0 +; X64-NEXT: psrlw $14, %xmm0 ; X64-NEXT: retq %tmp1 = bitcast <2 x i64> %b1 to <8 x i16> %tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll index ea77de5393375..8c451b7215b07 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll @@ -1666,17 +1666,17 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind { define <2 x i64> @foldv2i64() nounwind { ; SSE-LABEL: foldv2i64: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [55,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [55,0,0,0] ; SSE-NEXT: retq ; ; NOBW-LABEL: foldv2i64: ; NOBW: # %bb.0: -; NOBW-NEXT: vmovaps {{.*#+}} xmm0 = [55,0] +; NOBW-NEXT: vmovaps {{.*#+}} xmm0 = [55,0,0,0] ; NOBW-NEXT: retq ; ; AVX512VLBWDQ-LABEL: foldv2i64: ; AVX512VLBWDQ: # %bb.0: -; AVX512VLBWDQ-NEXT: vmovaps {{.*#+}} xmm0 = [55,0] +; AVX512VLBWDQ-NEXT: vmovaps {{.*#+}} xmm0 = [55,0,0,0] ; AVX512VLBWDQ-NEXT: retq ; ; X32-SSE-LABEL: foldv2i64: @@ -1690,17 +1690,17 @@ define <2 x i64> @foldv2i64() nounwind { define <2 x i64> @foldv2i64u() nounwind { ; SSE-LABEL: foldv2i64u: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [55,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [55,0,0,0] ; SSE-NEXT: retq ; ; NOBW-LABEL: foldv2i64u: ; NOBW: # %bb.0: -; NOBW-NEXT: vmovaps {{.*#+}} xmm0 = [55,0] +; NOBW-NEXT: vmovaps {{.*#+}} xmm0 = [55,0,0,0] ; NOBW-NEXT: retq ; ; AVX512VLBWDQ-LABEL: foldv2i64u: ; AVX512VLBWDQ: # %bb.0: -; AVX512VLBWDQ-NEXT: vmovaps {{.*#+}} xmm0 = [55,0] +; AVX512VLBWDQ-NEXT: vmovaps {{.*#+}} xmm0 = [55,0,0,0] ; AVX512VLBWDQ-NEXT: retq ; ; X32-SSE-LABEL: foldv2i64u: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 9d04c701ce6c8..cd82cc89778b1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -713,8 +713,7 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: movl $15, %eax -; AVX512VL-NEXT: vmovd %eax, %xmm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 0807aa6dab095..1cf048deca298 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1600,24 +1600,21 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX2-NEXT: movl $15, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX512VLBW-NEXT: movl $15, %eax -; AVX512VLBW-NEXT: vmovd %eax, %xmm1 ; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: movl $31, %eax -; AVX512VLVBMI-NEXT: vmovd %eax, %xmm1 +; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [31,0,0,0] ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 ; AVX512VLVBMI-NEXT: retq ; @@ -1632,9 +1629,8 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; XOPAVX2-NEXT: movl $15, %eax -; XOPAVX2-NEXT: vmovd %eax, %xmm1 ; XOPAVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -2791,7 +2787,7 @@ define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -2805,7 +2801,7 @@ define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index db36bdb5d9400..4e62ffae6738c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -187,9 +187,8 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { ; ; AVX2OR512VL-LABEL: shuffle_v8f32_70000000: ; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: movl $7, %eax -; AVX2OR512VL-NEXT: vmovd %eax, %xmm1 -; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [7,0,0,0] +; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -1510,9 +1509,8 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { ; ; AVX2OR512VL-LABEL: shuffle_v8i32_70000000: ; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: movl $7, %eax -; AVX2OR512VL-NEXT: vmovd %eax, %xmm1 -; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2OR512VL-NEXT: vmovaps {{.*#+}} xmm1 = [7,0,0,0] +; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index f798b55d7fd9d..59c4769017820 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -201,16 +201,14 @@ define <32 x i16> @shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_1 define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) { ; KNL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; KNL: ## %bb.0: -; KNL-NEXT: movl $65535, %eax ## imm = 0xFFFF -; KNL-NEXT: vmovd %eax, %xmm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vmovaps {{.*#+}} xmm1 = [65535,0,0,0] +; KNL-NEXT: vandps %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; SKX: ## %bb.0: -; SKX-NEXT: movl $65535, %eax ## imm = 0xFFFF -; SKX-NEXT: vmovd %eax, %xmm1 -; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vmovaps {{.*#+}} xmm1 = [65535,0,0,0] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 ; SKX-NEXT: retq %shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> ret <32 x i16> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 191b29a19ce3a..2cf8ab11ce278 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -109,9 +109,8 @@ define <64 x i8> @shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_ define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) { ; AVX512F-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $255, %eax -; AVX512F-NEXT: vmovd %eax, %xmm1 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,0,0] +; AVX512F-NEXT: vandps %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: @@ -121,9 +120,8 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) { ; ; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: movl $255, %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm1 -; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,0,0] +; AVX512DQ-NEXT: vandps %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 739ea6bae5352..f582a31a607dd 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -142,9 +142,8 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_70000000: ; ALL: # %bb.0: -; ALL-NEXT: movl $7, %eax -; ALL-NEXT: vmovd %eax, %xmm1 -; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0 +; ALL-NEXT: vmovaps {{.*#+}} xmm1 = [7,0,0,0] +; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; ALL-NEXT: ret{{[l|q]}} %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> ret <8 x double> %shuffle @@ -961,9 +960,8 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) { ; ALL-LABEL: shuffle_v8i64_70000000: ; ALL: # %bb.0: -; ALL-NEXT: movl $7, %eax -; ALL-NEXT: vmovd %eax, %xmm1 -; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0 +; ALL-NEXT: vmovaps {{.*#+}} xmm1 = [7,0,0,0] +; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; ALL-NEXT: ret{{[l|q]}} %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll index 257bd03dee8ee..dd1f766dcf667 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll @@ -899,12 +899,10 @@ define <8 x double> @combine_vpermi2var_vpermt2var_8f64_as_vperm2(<8 x double> % define <8 x double> @combine_vpermi2var_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1, i64 %a2) { ; X86-LABEL: combine_vpermi2var_8f64_as_permpd: ; X86: # %bb.0: -; X86-NEXT: movl $2, %eax -; X86-NEXT: vmovd %eax, %xmm2 -; X86-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; X86-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; X86-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm2, %ymm2 -; X86-NEXT: vinserti64x4 $1, {{\.LCPI.*}}, %zmm2, %zmm2 +; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; X86-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],mem[0] +; X86-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm2, %ymm2 +; X86-NEXT: vinsertf64x4 $1, {{\.LCPI.*}}, %zmm2, %zmm2 ; X86-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2 ; X86-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,1,1,6,7,5,5] ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 9c507ad5443e7..4a10a20bc5ff7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -131,27 +131,14 @@ define <4 x double> @combine_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x doub } define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x double> %a1, i64 %a2) { -; X86-AVX-LABEL: demandedelts_vpermil2pd256_as_shufpd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl $4, %eax -; X86-AVX-NEXT: vmovd %eax, %xmm2 -; X86-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; X86-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; X86-AVX-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm2, %ymm2 -; X86-AVX-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 -; X86-AVX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] -; X86-AVX-NEXT: retl -; -; X86-AVX2-LABEL: demandedelts_vpermil2pd256_as_shufpd: -; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: movl $4, %eax -; X86-AVX2-NEXT: vmovd %eax, %xmm2 -; X86-AVX2-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; X86-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; X86-AVX2-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm2, %ymm2 -; X86-AVX2-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] -; X86-AVX2-NEXT: retl +; X86-LABEL: demandedelts_vpermil2pd256_as_shufpd: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; X86-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],mem[0] +; X86-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm2, %ymm2 +; X86-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 +; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3] +; X86-NEXT: retl ; ; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd: ; X64: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index c511971096200..8ab57bf78a6f2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -46,7 +46,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,0] ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} @@ -60,8 +60,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} {z} -; AVX512VL-NEXT: movq $-1, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551615,0] ; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} @@ -71,10 +70,9 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; VL_BW_DQ: # %bb.0: ; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 -; VL_BW_DQ-NEXT: movq $-1, %rax -; VL_BW_DQ-NEXT: vmovq %rax, %xmm0 -; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1 -; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 +; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,0] +; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 ; VL_BW_DQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll index 91301f319b02e..5a0507a71799c 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -1576,32 +1576,32 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind { define <2 x i64> @foldv2i64() nounwind { ; SSE-LABEL: foldv2i64: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,0,0] ; SSE-NEXT: retq ; ; AVX-LABEL: foldv2i64: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; AVX-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: foldv2i64: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: foldv2i64: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; AVX512VPOPCNTDQVL-NEXT: retq ; ; BITALG_NOVLX-LABEL: foldv2i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: foldv2i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; BITALG-NEXT: retq ; ; X32-SSE-LABEL: foldv2i64: @@ -1615,32 +1615,32 @@ define <2 x i64> @foldv2i64() nounwind { define <2 x i64> @foldv2i64u() nounwind { ; SSE-LABEL: foldv2i64u: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,0,0] ; SSE-NEXT: retq ; ; AVX-LABEL: foldv2i64u: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; AVX-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: foldv2i64u: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: foldv2i64u: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; AVX512VPOPCNTDQVL-NEXT: retq ; ; BITALG_NOVLX-LABEL: foldv2i64u: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: foldv2i64u: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [8,0] +; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,0,0] ; BITALG-NEXT: retq ; ; X32-SSE-LABEL: foldv2i64u: From 5bb632339ac53e72b81921b6db9c1f0c1fbf63bb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 13:09:30 +0100 Subject: [PATCH 023/770] InlineAdvisor.h - remove unnecessary PreservedAnalyses forward declaration. NFC. This is directly defined in PassManager.h --- llvm/include/llvm/Analysis/InlineAdvisor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 118fd236bee48..ac8e7c20429de 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -22,7 +22,6 @@ class CallBase; class Function; class Module; class OptimizationRemarkEmitter; -class PreservedAnalyses; /// There are 3 scenarios we can use the InlineAdvisor: /// - Default - use manual heuristics. From 8b4ecafee66c405ca33b9d2dc826c2d720160432 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 13:27:10 +0100 Subject: [PATCH 024/770] InstructionSimplify.h - remove unnecessary includes. NFC. Remove unused User.h include. Replace SetVector.h with forward declaration. Sort the forward declarations + remove FastMathFlags (defined in Operator.h). Fix implicit SetVector.h dependency in LowerConstantIntrinsics.cpp. --- llvm/include/llvm/Analysis/InstructionSimplify.h | 13 ++++++------- .../Transforms/Scalar/LowerConstantIntrinsics.cpp | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 7a9a1a81555b4..2a39a4e090870 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -31,28 +31,27 @@ #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H -#include "llvm/ADT/SetVector.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/User.h" namespace llvm { -class Function; + template class AnalysisManager; template class ArrayRef; class AssumptionCache; +class BinaryOperator; class CallBase; -class DominatorTree; class DataLayout; -class FastMathFlags; +class DominatorTree; +class Function; struct LoopStandardAnalysisResults; +class MDNode; class OptimizationRemarkEmitter; class Pass; +template class SmallSetVector; class TargetLibraryInfo; class Type; class Value; -class MDNode; -class BinaryOperator; /// InstrInfoQuery provides an interface to query additional information for /// instructions like metadata or keywords like nsw, which provides conservative diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp index 1dafa17e5c75e..f59280b60d8da 100644 --- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" From 03ec5b6bc4629b9ce4e11cbf54799995dbcb9c29 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 13:43:20 +0100 Subject: [PATCH 025/770] LoopInfo.h - remove unnecessary PHINode forward declaration. NFC. This is directly defined in Instructions.h --- llvm/include/llvm/Analysis/LoopInfo.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 0498020249856..35fe2a03a2a2d 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -60,7 +60,6 @@ class Loop; class InductionDescriptor; class MDNode; class MemorySSAUpdater; -class PHINode; class ScalarEvolution; class raw_ostream; template class DominatorTreeBase; From 0e3faab6f0fa00668f97747a6a4afa1bc5647ef9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 13:44:27 +0100 Subject: [PATCH 026/770] MemoryBuiltins.h - remove unnecessary TargetLibraryInfo forward declaration. NFC. We already have to include TargetLibraryInfo.h --- llvm/include/llvm/Analysis/MemoryBuiltins.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index 13fc95e00f760..c5428726995e4 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -47,7 +47,6 @@ class LoadInst; class PHINode; class PointerType; class SelectInst; -class TargetLibraryInfo; class Type; class UndefValue; class Value; From 8eae32188bbaa4ac5943f8a98b3b7e4bbba55698 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Wed, 6 May 2020 13:42:01 +0200 Subject: [PATCH 027/770] Improve stack-clash implementation on x86 - test both 32 and 64 bit version - probe the tail in dynamic-alloca - generate more concise code Differential Revision: https://reviews.llvm.org/D79482 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 25 +++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 76 ++++++++------- .../CodeGen/X86/stack-clash-dynamic-alloca.ll | 95 ++++++++++++------- llvm/test/CodeGen/X86/stack-clash-large.ll | 65 ++++++++----- llvm/test/CodeGen/X86/stack-clash-medium.ll | 47 +++++---- 5 files changed, 180 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 062cf7acc58d4..f320041b2de63 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -275,9 +275,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, // allocation is split in smaller chunks anyway. if (EmitInlineStackProbe && !InEpilogue) { - // stack probing may involve looping, and control flow generations is - // disallowed at this point. Rely to later processing through - // `inlineStackProbe`. + // Delegate stack probing to the `inlineStackProbe` mechanism to avoid + // complications. MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true); // Encode the static offset as a metadata attached to the stub. @@ -645,6 +644,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset) const { + assert(Offset && "null offset"); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); @@ -662,8 +662,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - unsigned FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D; - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FinalStackPtr) + Register FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D; + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackPtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -693,7 +693,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( .setMIFlag(MachineInstr::FrameSetup); // cmp with stack pointer bound - BuildMI(testMBB, DL, TII.get(IsLP64 ? X86::CMP64rr : X86::CMP32rr)) + BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) .addReg(StackPtr) .addReg(FinalStackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -701,23 +701,22 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( // jump BuildMI(testMBB, DL, TII.get(X86::JCC_1)) .addMBB(testMBB) - .addImm(X86::COND_NE) + .addImm(X86::COND_L) .setMIFlag(MachineInstr::FrameSetup); testMBB->addSuccessor(testMBB); testMBB->addSuccessor(tailMBB); testMBB->addLiveIn(FinalStackPtr); - // allocate a block and touch it - + // BB management tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); tailMBB->transferSuccessorsAndUpdatePHIs(&MBB); MBB.addSuccessor(testMBB); + // handle tail if (Offset % StackProbeSize) { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(Offset % StackProbeSize) + BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(TargetOpcode::COPY), + StackPtr) + .addReg(FinalStackPtr) .setMIFlag(MachineInstr::FrameSetup); } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eab9f14bec910..5101977a68edc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31612,14 +31612,26 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, return SinkMBB; } +static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { + if (IsLP64) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + MachineBasicBlock * X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, - MachineBasicBlock *BB) const { - MachineFunction *MF = BB->getParent(); + MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const X86FrameLowering &TFI = *Subtarget.getFrameLowering(); DebugLoc DL = MI.getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); const unsigned ProbeSize = getStackProbeSize(*MF); @@ -31628,31 +31640,35 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator MBBIter = ++BB->getIterator(); + MachineFunction::iterator MBBIter = ++MBB->getIterator(); MF->insert(MBBIter, testMBB); MF->insert(MBBIter, blockMBB); MF->insert(MBBIter, tailMBB); - unsigned sizeVReg = MI.getOperand(1).getReg(); + Register sizeVReg = MI.getOperand(1).getReg(); - const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg); + Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; - unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass); - unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass); + Register TmpStackPtr = MRI.createVirtualRegister( + TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); + Register FinalStackPtr = MRI.createVirtualRegister( + TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); - unsigned physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; + BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr) + .addReg(physSPReg); + { + const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr; + BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr) + .addReg(TmpStackPtr) + .addReg(sizeVReg); + } // test rsp size - BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg) - .addReg(sizeVReg) - .addMBB(BB) - .addReg(tmpSizeVReg2) - .addMBB(blockMBB); BuildMI(testMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::CMP64ri32 : X86::CMP32ri)) - .addReg(tmpSizeVReg) - .addImm(ProbeSize); + TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(physSPReg) + .addReg(FinalStackPtr); BuildMI(testMBB, DL, TII->get(X86::JCC_1)) .addMBB(tailMBB) @@ -31663,14 +31679,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, // allocate a block and touch it BuildMI(blockMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri), - tmpSizeVReg2) - .addReg(tmpSizeVReg) - .addImm(ProbeSize); - - BuildMI(blockMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri), - physSPReg) + TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) .addReg(physSPReg) .addImm(ProbeSize); @@ -31682,19 +31691,14 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); blockMBB->addSuccessor(testMBB); - // allocate the tail and continue - BuildMI(tailMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr), - physSPReg) - .addReg(physSPReg) - .addReg(tmpSizeVReg); + // Replace original instruction by the expected stack ptr BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) - .addReg(physSPReg); + .addReg(FinalStackPtr); - tailMBB->splice(tailMBB->end(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - tailMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(testMBB); + tailMBB->splice(tailMBB->end(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + tailMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(testMBB); // Delete the original pseudo instruction. MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll index c0a199e16a941..140da42fc6fbb 100644 --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -1,39 +1,7 @@ -; RUN: llc < %s | FileCheck %s - - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo(i32 %n) local_unnamed_addr #0 { - -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: leaq 15(,%rax,4), %rax -; CHECK-NEXT: andq $-16, %rax -; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000 -; CHECK-NEXT: jl .LBB0_3 -; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $4096, %rax # imm = 0x1000 -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000 -; CHECK-NEXT: jge .LBB0_2 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: subq %rax, %rsp -; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movl $1, 4792(%rax) -; CHECK-NEXT: movl (%rax), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq - %a = alloca i32, i32 %n, align 16 %b = getelementptr inbounds i32, i32* %a, i64 1198 store volatile i32 1, i32* %b @@ -42,3 +10,62 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 { } attributes #0 = {"probe-stack"="inline-asm"} + +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: pushq %rbp +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X86-64-NEXT: .cfi_offset %rbp, -16 +; CHECK-X86-64-NEXT: movq %rsp, %rbp +; CHECK-X86-64-NEXT: .cfi_def_cfa_register %rbp +; CHECK-X86-64-NEXT: movq %rsp, %rax +; CHECK-X86-64-NEXT: movl %edi, %ecx +; CHECK-X86-64-NEXT: leaq 15(,%rcx,4), %rcx +; CHECK-X86-64-NEXT: andq $-16, %rcx +; CHECK-X86-64-NEXT: subq %rcx, %rax +; CHECK-X86-64-NEXT: cmpq %rax, %rsp +; CHECK-X86-64-NEXT: jl .LBB0_3 +; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: cmpq %rax, %rsp +; CHECK-X86-64-NEXT: jge .LBB0_2 +; CHECK-X86-64-NEXT: .LBB0_3: +; CHECK-X86-64-NEXT: movq %rax, %rsp +; CHECK-X86-64-NEXT: movl $1, 4792(%rax) +; CHECK-X86-64-NEXT: movl (%rax), %eax +; CHECK-X86-64-NEXT: movq %rbp, %rsp +; CHECK-X86-64-NEXT: popq %rbp +; CHECK-X86-64-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-X86-64-NEXT: retq + + +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: pushl %ebp +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-32-NEXT: .cfi_offset %ebp, -8 +; CHECK-X86-32-NEXT: movl %esp, %ebp +; CHECK-X86-32-NEXT: .cfi_def_cfa_register %ebp +; CHECK-X86-32-NEXT: subl $8, %esp +; CHECK-X86-32-NEXT: movl 8(%ebp), %ecx +; CHECK-X86-32-NEXT: movl %esp, %eax +; CHECK-X86-32-NEXT: leal 15(,%ecx,4), %ecx +; CHECK-X86-32-NEXT: andl $-16, %ecx +; CHECK-X86-32-NEXT: subl %ecx, %eax +; CHECK-X86-32-NEXT: cmpl %eax, %esp +; CHECK-X86-32-NEXT: jl .LBB0_3 +; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: cmpl %eax, %esp +; CHECK-X86-32-NEXT: jge .LBB0_2 +; CHECK-X86-32-NEXT: .LBB0_3: +; CHECK-X86-32-NEXT: movl %eax, %esp +; CHECK-X86-32-NEXT: movl $1, 4792(%eax) +; CHECK-X86-32-NEXT: movl (%eax), %eax +; CHECK-X86-32-NEXT: movl %ebp, %esp +; CHECK-X86-32-NEXT: popl %ebp +; CHECK-X86-32-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-X86-32-NEXT: retl + diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll index f9a5fdc17b84a..ccc52f213e377 100644 --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -1,31 +1,8 @@ -; RUN: llc < %s | FileCheck %s - - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo() local_unnamed_addr #0 { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $69632, %r11 # imm = 0x11000 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT:# %bb.2: -; CHECK-NEXT: subq $2248, %rsp # imm = 0x8C8 -; CHECK-NEXT: .cfi_def_cfa_offset 71888 -; CHECK-NEXT: movl $1, 264(%rsp) -; CHECK-NEXT: movl $1, 28664(%rsp) -; CHECK-NEXT: movl -128(%rsp), %eax -; CHECK-NEXT: addq $71880, %rsp # imm = 0x118C8 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - - %a = alloca i32, i64 18000, align 16 %b0 = getelementptr inbounds i32, i32* %a, i64 98 %b1 = getelementptr inbounds i32, i32* %a, i64 7198 @@ -36,3 +13,41 @@ define i32 @foo() local_unnamed_addr #0 { } attributes #0 = {"probe-stack"="inline-asm"} + +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: movq %rsp, %r11 +; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X86-64-NEXT: .LBB0_1: +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: cmpq %r11, %rsp +; CHECK-X86-64-NEXT: jl .LBB0_1 +; CHECK-X86-64-NEXT:# %bb.2: +; CHECK-X86-64-NEXT: movq %r11, %rsp +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 +; CHECK-X86-64-NEXT: movl $1, 264(%rsp) +; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) +; CHECK-X86-64-NEXT: movl -128(%rsp), %eax +; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-64-NEXT: retq + +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: movl %esp, %r11d +; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: cmpl %r11d, %esp +; CHECK-X86-32-NEXT: jl .LBB0_1 +; CHECK-X86-32-NEXT:# %bb.2: +; CHECK-X86-32-NEXT: movl %r11d, %esp +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 +; CHECK-X86-32-NEXT: movl $1, 392(%esp) +; CHECK-X86-32-NEXT: movl $1, 28792(%esp) +; CHECK-X86-32-NEXT: movl (%esp), %eax +; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll index 05af3478cfc07..5a97074025f1f 100644 --- a/llvm/test/CodeGen/X86/stack-clash-medium.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -1,25 +1,7 @@ -; RUN: llc < %s | FileCheck %s - - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo() local_unnamed_addr #0 { - -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 -; CHECK-NEXT: .cfi_def_cfa_offset 7888 -; CHECK-NEXT: movl $1, 672(%rsp) -; CHECK-NEXT: movl -128(%rsp), %eax -; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - - - %a = alloca i32, i64 2000, align 16 %b = getelementptr inbounds i32, i32* %a, i64 200 store volatile i32 1, i32* %b @@ -28,3 +10,28 @@ define i32 @foo() local_unnamed_addr #0 { } attributes #0 = {"probe-stack"="inline-asm"} + +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888 +; CHECK-X86-64-NEXT: movl $1, 672(%rsp) +; CHECK-X86-64-NEXT: movl -128(%rsp), %eax +; CHECK-X86-64-NEXT: addq $7880, %rsp # imm = 0x1EC8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-64-NEXT: retq + + +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016 +; CHECK-X86-32-NEXT: movl $1, 800(%esp) +; CHECK-X86-32-NEXT: movl (%esp), %eax +; CHECK-X86-32-NEXT: addl $8012, %esp # imm = 0x1F4C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-32-NEXT: retl From 6ade4eb91811c7e7c59634d2de2767421d13a99b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 13:59:46 +0100 Subject: [PATCH 028/770] MemoryLocation.h - reduce Instructions.h include to Instruction.h include. NFC. Add forward declarations for the few Instr classes we reference. --- llvm/include/llvm/Analysis/MemoryLocation.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index 9e5e43bbd5f5b..ce70df66ab7a8 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -17,21 +17,25 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/TypeSize.h" namespace llvm { +class CallBase; class LoadInst; class StoreInst; class MemTransferInst; class MemIntrinsic; +class AtomicCmpXchgInst; class AtomicMemTransferInst; class AtomicMemIntrinsic; +class AtomicRMWInst; class AnyMemTransferInst; class AnyMemIntrinsic; class TargetLibraryInfo; +class VAArgInst; // Represents the size of a MemoryLocation. Logically, it's an // Optional that also carries a bit to represent whether the integer From 82bee922afd65bf884abb9ea3db3fc7fede4e1cf Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 25 May 2020 15:12:08 +0200 Subject: [PATCH 029/770] Make FEATURE_AVX512VP2INTERSECT match between compiler-rt and LLVM compiler-rt also doesn't support bits >= 64 as far as I know. --- llvm/include/llvm/Support/X86TargetParser.def | 2 +- llvm/lib/Support/Host.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index f4780c7d113e4..c826f590b71f3 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -162,7 +162,7 @@ X86_FEATURE_COMPAT(33, FEATURE_VPCLMULQDQ, "vpclmulqdq") X86_FEATURE_COMPAT(34, FEATURE_AVX512VNNI, "avx512vnni") X86_FEATURE_COMPAT(35, FEATURE_AVX512BITALG, "avx512bitalg") X86_FEATURE_COMPAT(36, FEATURE_AVX512BF16, "avx512bf16") -X86_FEATURE_COMPAT(69, FEATURE_AVX512VP2INTERSECT, "avx512vp2intersect") +X86_FEATURE_COMPAT(37, FEATURE_AVX512VP2INTERSECT, "avx512vp2intersect") // Features below here are not in libgcc/compiler-rt. X86_FEATURE (64, FEATURE_MOVBE) X86_FEATURE (65, FEATURE_ADX) diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index d9187efafbc19..d9b3cac5e8dc0 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -776,7 +776,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, default: // Unknown family 6 CPU, try to guess. // TODO detect tigerlake host - if (Features3 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 64))) { + if (Features2 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 32))) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_TIGERLAKE; break; From 5d6c5b463cab7aeb74b20f51af88ba1d1658f8a8 Mon Sep 17 00:00:00 2001 From: Whitney Tsang Date: Mon, 25 May 2020 13:31:57 +0000 Subject: [PATCH 030/770] [LoopUtils] Use llvm::find Summary: Fixes this build error: llvm/lib/Transforms/Utils/LoopUtils.cpp:679:26: error: no matching function for call to 'find' Loop::iterator I = find(ParentLoop->begin(), ParentLoop->end(), L); ^~~~ Authored By: orivej Reviewer: Whitney Reviewed By: Whitney Subscribers: hiraditya, llvm-commits Tag: LLVM Differential Revision: https://reviews.llvm.org/D80473 --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 8c7475eae6e32..d7ea9923ed2e9 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -713,11 +713,11 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // its parent. While removeLoop/removeChildLoop remove the given loop but // not relink its subloops, which is what we want. if (Loop *ParentLoop = L->getParentLoop()) { - Loop::iterator I = find(ParentLoop->begin(), ParentLoop->end(), L); + Loop::iterator I = find(*ParentLoop, L); assert(I != ParentLoop->end() && "Couldn't find loop"); ParentLoop->removeChildLoop(I); } else { - Loop::iterator I = find(LI->begin(), LI->end(), L); + Loop::iterator I = find(*LI, L); assert(I != LI->end() && "Couldn't find loop"); LI->removeLoop(I); } From 356bf5ea5d91642b7a932a368804cef6733133c2 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 25 May 2020 15:56:48 +0200 Subject: [PATCH 031/770] Stack clash: update live-ins This fixes http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-debian/builds/7150 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index f320041b2de63..5ee7c87a42b9b 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -705,7 +705,6 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( .setMIFlag(MachineInstr::FrameSetup); testMBB->addSuccessor(testMBB); testMBB->addSuccessor(tailMBB); - testMBB->addLiveIn(FinalStackPtr); // BB management tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); @@ -719,6 +718,10 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( .addReg(FinalStackPtr) .setMIFlag(MachineInstr::FrameSetup); } + + // Update Live In information + recomputeLiveIns(*testMBB); + recomputeLiveIns(*tailMBB); } void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( From 46e5c5fe778b92b2a7e2c2ad3610e1da6794bd5e Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Fri, 22 May 2020 13:10:37 +0300 Subject: [PATCH 032/770] [ManagedStatic] Fix build errors with clang-tblgen in Debug mode using MSVC 2019 v16.6 After updating MSVS19 from v16.4 to v16.6 I faced with a build errors compiling in Debug mode. It complains on clang-tblgen.exe and llvm-tblgen.exe cmd line args. VS compiler had a bug. It dynamically creates an object with constexpr ctor in Debug mode. This bug was fixed in VS2019 v16.5. A workaround was implemented for that and everything works until v16.5 comes. The workaround became irrelevant since v16.5 and caused build errors. So I disabled the workaround for VS2019 v16.5 and higher. This relates to http://llvm.org/PR41367. Differential Revision: https://reviews.llvm.org/D80433 --- llvm/include/llvm/Support/ManagedStatic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Support/ManagedStatic.h b/llvm/include/llvm/Support/ManagedStatic.h index bbd0d04ed0404..f2b41422f1315 100644 --- a/llvm/include/llvm/Support/ManagedStatic.h +++ b/llvm/include/llvm/Support/ManagedStatic.h @@ -40,8 +40,8 @@ template struct object_deleter { // constexpr, a dynamic initializer may be emitted depending on optimization // settings. For the affected versions of MSVC, use the old linker // initialization pattern of not providing a constructor and leaving the fields -// uninitialized. -#if !defined(_MSC_VER) || defined(__clang__) +// uninitialized. See http://llvm.org/PR41367 for details. +#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__) #define LLVM_USE_CONSTEXPR_CTOR #endif From ba03bcbc4a21b92f6a4a54bd6e90417956da7952 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 25 May 2020 15:59:48 +0200 Subject: [PATCH 033/770] [lldb] Remove custom DWARF expression printing code The llvm DWARFExpression dump is nearly identical, but better -- for example it does print a spurious space after zero-argument expressions. Some parts of our code (variable locations) have been already switched to llvm-based expression dumping. This switches the remainder: unwind plans and some unit tests. --- .../include/lldb/Expression/DWARFExpression.h | 8 - lldb/source/Expression/DWARFExpression.cpp | 229 ------------- .../Plugins/SymbolFile/DWARF/DWARFDefines.cpp | 315 ------------------ .../Plugins/SymbolFile/DWARF/DWARFDefines.h | 40 --- lldb/source/Symbol/UnwindPlan.cpp | 12 +- .../SymbolFile/Breakpad/stack-cfi-arm.yaml | 3 +- .../Breakpad/stack-cfi-parsing.test | 4 +- .../Breakpad/unwind-via-raSearch.test | 4 +- .../Breakpad/unwind-via-stack-cfi.test | 2 +- .../Breakpad/unwind-via-stack-win.test | 4 +- .../Shell/Unwind/eh-frame-dwarf-unwind.test | 2 +- .../Shell/Unwind/unwind-plan-dwarf-dump.test | 2 +- .../Symbol/PostfixExpressionTest.cpp | 41 ++- .../PdbFPOProgramToDWARFExpressionTests.cpp | 34 +- 14 files changed, 47 insertions(+), 653 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 302936172ed2c..6b63b186e3e43 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -217,14 +217,6 @@ class DWARFExpression { lldb::addr_t func_load_addr, lldb::addr_t address, ABI *abi); - static bool PrintDWARFExpression(Stream &s, const DataExtractor &data, - int address_size, int dwarf_ref_size, - bool location_expression); - - static void PrintDWARFLocationList(Stream &s, const DWARFUnit *cu, - const DataExtractor &debug_loc_data, - lldb::offset_t offset); - bool MatchesOperand(StackFrame &frame, const Instruction::Operand &op); private: diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index e85b5f341fe5b..f8fc1db7ec29f 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -2566,235 +2566,6 @@ bool DWARFExpression::Evaluate( return true; // Return true on success } -static bool print_dwarf_exp_op(Stream &s, const DataExtractor &data, - lldb::offset_t *offset_ptr, int address_size, - int dwarf_ref_size) { - uint8_t opcode = data.GetU8(offset_ptr); - DRC_class opcode_class; - uint64_t uint; - int64_t sint; - - int size; - - opcode_class = DW_OP_value_to_class(opcode) & (~DRC_DWARFv3); - - s.Printf("%s ", DW_OP_value_to_name(opcode)); - - /* Does this take zero parameters? If so we can shortcut this function. */ - if (opcode_class == DRC_ZEROOPERANDS) - return true; - - if (opcode_class == DRC_TWOOPERANDS && opcode == DW_OP_bregx) { - uint = data.GetULEB128(offset_ptr); - sint = data.GetSLEB128(offset_ptr); - s.Printf("%" PRIu64 " %" PRIi64, uint, sint); - return true; - } - if (opcode_class == DRC_TWOOPERANDS && opcode == DW_OP_entry_value) { - uint = data.GetULEB128(offset_ptr); - s.Printf("%" PRIu64 " ", uint); - return true; - } - if (opcode_class != DRC_ONEOPERAND) { - s.Printf("UNKNOWN OP %u", opcode); - return false; - } - - switch (opcode) { - case DW_OP_addr: - size = address_size; - break; - case DW_OP_const1u: - size = 1; - break; - case DW_OP_const1s: - size = -1; - break; - case DW_OP_const2u: - size = 2; - break; - case DW_OP_const2s: - size = -2; - break; - case DW_OP_const4u: - size = 4; - break; - case DW_OP_const4s: - size = -4; - break; - case DW_OP_const8u: - size = 8; - break; - case DW_OP_const8s: - size = -8; - break; - case DW_OP_constu: - size = 128; - break; - case DW_OP_consts: - size = -128; - break; - case DW_OP_fbreg: - size = -128; - break; - case DW_OP_breg0: - case DW_OP_breg1: - case DW_OP_breg2: - case DW_OP_breg3: - case DW_OP_breg4: - case DW_OP_breg5: - case DW_OP_breg6: - case DW_OP_breg7: - case DW_OP_breg8: - case DW_OP_breg9: - case DW_OP_breg10: - case DW_OP_breg11: - case DW_OP_breg12: - case DW_OP_breg13: - case DW_OP_breg14: - case DW_OP_breg15: - case DW_OP_breg16: - case DW_OP_breg17: - case DW_OP_breg18: - case DW_OP_breg19: - case DW_OP_breg20: - case DW_OP_breg21: - case DW_OP_breg22: - case DW_OP_breg23: - case DW_OP_breg24: - case DW_OP_breg25: - case DW_OP_breg26: - case DW_OP_breg27: - case DW_OP_breg28: - case DW_OP_breg29: - case DW_OP_breg30: - case DW_OP_breg31: - size = -128; - break; - case DW_OP_pick: - case DW_OP_deref_size: - case DW_OP_xderef_size: - size = 1; - break; - case DW_OP_skip: - case DW_OP_bra: - size = -2; - break; - case DW_OP_call2: - size = 2; - break; - case DW_OP_call4: - size = 4; - break; - case DW_OP_call_ref: - size = dwarf_ref_size; - break; - case DW_OP_addrx: - case DW_OP_piece: - case DW_OP_plus_uconst: - case DW_OP_regx: - case DW_OP_GNU_addr_index: - case DW_OP_GNU_const_index: - case DW_OP_entry_value: - size = 128; - break; - default: - s.Printf("UNKNOWN ONE-OPERAND OPCODE, #%u", opcode); - return false; - } - - switch (size) { - case -1: - sint = (int8_t)data.GetU8(offset_ptr); - s.Printf("%+" PRIi64, sint); - break; - case -2: - sint = (int16_t)data.GetU16(offset_ptr); - s.Printf("%+" PRIi64, sint); - break; - case -4: - sint = (int32_t)data.GetU32(offset_ptr); - s.Printf("%+" PRIi64, sint); - break; - case -8: - sint = (int64_t)data.GetU64(offset_ptr); - s.Printf("%+" PRIi64, sint); - break; - case -128: - sint = data.GetSLEB128(offset_ptr); - s.Printf("%+" PRIi64, sint); - break; - case 1: - uint = data.GetU8(offset_ptr); - s.Printf("0x%2.2" PRIx64, uint); - break; - case 2: - uint = data.GetU16(offset_ptr); - s.Printf("0x%4.4" PRIx64, uint); - break; - case 4: - uint = data.GetU32(offset_ptr); - s.Printf("0x%8.8" PRIx64, uint); - break; - case 8: - uint = data.GetU64(offset_ptr); - s.Printf("0x%16.16" PRIx64, uint); - break; - case 128: - uint = data.GetULEB128(offset_ptr); - s.Printf("0x%" PRIx64, uint); - break; - } - - return true; -} - -bool DWARFExpression::PrintDWARFExpression(Stream &s, const DataExtractor &data, - int address_size, int dwarf_ref_size, - bool location_expression) { - int op_count = 0; - lldb::offset_t offset = 0; - while (data.ValidOffset(offset)) { - if (location_expression && op_count > 0) - return false; - if (op_count > 0) - s.PutCString(", "); - if (!print_dwarf_exp_op(s, data, &offset, address_size, dwarf_ref_size)) - return false; - op_count++; - } - - return true; -} - -void DWARFExpression::PrintDWARFLocationList( - Stream &s, const DWARFUnit *cu, const DataExtractor &debug_loc_data, - lldb::offset_t offset) { - uint64_t start_addr, end_addr; - uint32_t addr_size = DWARFUnit::GetAddressByteSize(cu); - s.SetAddressByteSize(DWARFUnit::GetAddressByteSize(cu)); - dw_addr_t base_addr = cu ? cu->GetBaseAddress() : 0; - while (debug_loc_data.ValidOffset(offset)) { - start_addr = debug_loc_data.GetMaxU64(&offset, addr_size); - end_addr = debug_loc_data.GetMaxU64(&offset, addr_size); - - if (start_addr == 0 && end_addr == 0) - break; - - s.PutCString("\n "); - s.Indent(); - if (cu) - DumpAddressRange(s.AsRawOstream(), start_addr + base_addr, - end_addr + base_addr, cu->GetAddressByteSize(), nullptr, - ": "); - uint32_t loc_length = debug_loc_data.GetU16(&offset); - - DataExtractor locationData(debug_loc_data, offset, loc_length); - PrintDWARFExpression(s, locationData, addr_size, 4, false); - offset += loc_length; - } -} - static DataExtractor ToDataExtractor(const llvm::DWARFLocationExpression &loc, ByteOrder byte_order, uint32_t addr_size) { auto buffer_sp = diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp index 29db1101d9971..4e99a295ce50f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp @@ -58,321 +58,6 @@ const char *DW_OP_value_to_name(uint32_t val) { return llvmstr.data(); } -DRC_class DW_OP_value_to_class(uint32_t val) { - // FIXME: If we just used llvm's DWARFExpression printer, we could delete - // all this code (and more in lldb's DWARFExpression.cpp). - switch (val) { - case 0x03: - return DRC_ONEOPERAND; - case 0x06: - return DRC_ZEROOPERANDS; - case 0x08: - return DRC_ONEOPERAND; - case 0x09: - return DRC_ONEOPERAND; - case 0x0a: - return DRC_ONEOPERAND; - case 0x0b: - return DRC_ONEOPERAND; - case 0x0c: - return DRC_ONEOPERAND; - case 0x0d: - return DRC_ONEOPERAND; - case 0x0e: - return DRC_ONEOPERAND; - case 0x0f: - return DRC_ONEOPERAND; - case 0x10: - return DRC_ONEOPERAND; - case 0x11: - return DRC_ONEOPERAND; - case 0x12: - return DRC_ZEROOPERANDS; - case 0x13: - return DRC_ZEROOPERANDS; - case 0x14: - return DRC_ZEROOPERANDS; - case 0x15: - return DRC_ONEOPERAND; - case 0x16: - return DRC_ZEROOPERANDS; - case 0x17: - return DRC_ZEROOPERANDS; - case 0x18: - return DRC_ZEROOPERANDS; - case 0x19: - return DRC_ZEROOPERANDS; - case 0x1a: - return DRC_ZEROOPERANDS; - case 0x1b: - return DRC_ZEROOPERANDS; - case 0x1c: - return DRC_ZEROOPERANDS; - case 0x1d: - return DRC_ZEROOPERANDS; - case 0x1e: - return DRC_ZEROOPERANDS; - case 0x1f: - return DRC_ZEROOPERANDS; - case 0x20: - return DRC_ZEROOPERANDS; - case 0x21: - return DRC_ZEROOPERANDS; - case 0x22: - return DRC_ZEROOPERANDS; - case 0x23: - return DRC_ONEOPERAND; - case 0x24: - return DRC_ZEROOPERANDS; - case 0x25: - return DRC_ZEROOPERANDS; - case 0x26: - return DRC_ZEROOPERANDS; - case 0x27: - return DRC_ZEROOPERANDS; - case 0x2f: - return DRC_ONEOPERAND; - case 0x28: - return DRC_ONEOPERAND; - case 0x29: - return DRC_ZEROOPERANDS; - case 0x2a: - return DRC_ZEROOPERANDS; - case 0x2b: - return DRC_ZEROOPERANDS; - case 0x2c: - return DRC_ZEROOPERANDS; - case 0x2d: - return DRC_ZEROOPERANDS; - case 0x2e: - return DRC_ZEROOPERANDS; - case 0x30: - return DRC_ZEROOPERANDS; - case 0x31: - return DRC_ZEROOPERANDS; - case 0x32: - return DRC_ZEROOPERANDS; - case 0x33: - return DRC_ZEROOPERANDS; - case 0x34: - return DRC_ZEROOPERANDS; - case 0x35: - return DRC_ZEROOPERANDS; - case 0x36: - return DRC_ZEROOPERANDS; - case 0x37: - return DRC_ZEROOPERANDS; - case 0x38: - return DRC_ZEROOPERANDS; - case 0x39: - return DRC_ZEROOPERANDS; - case 0x3a: - return DRC_ZEROOPERANDS; - case 0x3b: - return DRC_ZEROOPERANDS; - case 0x3c: - return DRC_ZEROOPERANDS; - case 0x3d: - return DRC_ZEROOPERANDS; - case 0x3e: - return DRC_ZEROOPERANDS; - case 0x3f: - return DRC_ZEROOPERANDS; - case 0x40: - return DRC_ZEROOPERANDS; - case 0x41: - return DRC_ZEROOPERANDS; - case 0x42: - return DRC_ZEROOPERANDS; - case 0x43: - return DRC_ZEROOPERANDS; - case 0x44: - return DRC_ZEROOPERANDS; - case 0x45: - return DRC_ZEROOPERANDS; - case 0x46: - return DRC_ZEROOPERANDS; - case 0x47: - return DRC_ZEROOPERANDS; - case 0x48: - return DRC_ZEROOPERANDS; - case 0x49: - return DRC_ZEROOPERANDS; - case 0x4a: - return DRC_ZEROOPERANDS; - case 0x4b: - return DRC_ZEROOPERANDS; - case 0x4c: - return DRC_ZEROOPERANDS; - case 0x4d: - return DRC_ZEROOPERANDS; - case 0x4e: - return DRC_ZEROOPERANDS; - case 0x4f: - return DRC_ZEROOPERANDS; - case 0x50: - return DRC_ZEROOPERANDS; - case 0x51: - return DRC_ZEROOPERANDS; - case 0x52: - return DRC_ZEROOPERANDS; - case 0x53: - return DRC_ZEROOPERANDS; - case 0x54: - return DRC_ZEROOPERANDS; - case 0x55: - return DRC_ZEROOPERANDS; - case 0x56: - return DRC_ZEROOPERANDS; - case 0x57: - return DRC_ZEROOPERANDS; - case 0x58: - return DRC_ZEROOPERANDS; - case 0x59: - return DRC_ZEROOPERANDS; - case 0x5a: - return DRC_ZEROOPERANDS; - case 0x5b: - return DRC_ZEROOPERANDS; - case 0x5c: - return DRC_ZEROOPERANDS; - case 0x5d: - return DRC_ZEROOPERANDS; - case 0x5e: - return DRC_ZEROOPERANDS; - case 0x5f: - return DRC_ZEROOPERANDS; - case 0x60: - return DRC_ZEROOPERANDS; - case 0x61: - return DRC_ZEROOPERANDS; - case 0x62: - return DRC_ZEROOPERANDS; - case 0x63: - return DRC_ZEROOPERANDS; - case 0x64: - return DRC_ZEROOPERANDS; - case 0x65: - return DRC_ZEROOPERANDS; - case 0x66: - return DRC_ZEROOPERANDS; - case 0x67: - return DRC_ZEROOPERANDS; - case 0x68: - return DRC_ZEROOPERANDS; - case 0x69: - return DRC_ZEROOPERANDS; - case 0x6a: - return DRC_ZEROOPERANDS; - case 0x6b: - return DRC_ZEROOPERANDS; - case 0x6c: - return DRC_ZEROOPERANDS; - case 0x6d: - return DRC_ZEROOPERANDS; - case 0x6e: - return DRC_ZEROOPERANDS; - case 0x6f: - return DRC_ZEROOPERANDS; - case 0x70: - return DRC_ONEOPERAND; - case 0x71: - return DRC_ONEOPERAND; - case 0x72: - return DRC_ONEOPERAND; - case 0x73: - return DRC_ONEOPERAND; - case 0x74: - return DRC_ONEOPERAND; - case 0x75: - return DRC_ONEOPERAND; - case 0x76: - return DRC_ONEOPERAND; - case 0x77: - return DRC_ONEOPERAND; - case 0x78: - return DRC_ONEOPERAND; - case 0x79: - return DRC_ONEOPERAND; - case 0x7a: - return DRC_ONEOPERAND; - case 0x7b: - return DRC_ONEOPERAND; - case 0x7c: - return DRC_ONEOPERAND; - case 0x7d: - return DRC_ONEOPERAND; - case 0x7e: - return DRC_ONEOPERAND; - case 0x7f: - return DRC_ONEOPERAND; - case 0x80: - return DRC_ONEOPERAND; - case 0x81: - return DRC_ONEOPERAND; - case 0x82: - return DRC_ONEOPERAND; - case 0x83: - return DRC_ONEOPERAND; - case 0x84: - return DRC_ONEOPERAND; - case 0x85: - return DRC_ONEOPERAND; - case 0x86: - return DRC_ONEOPERAND; - case 0x87: - return DRC_ONEOPERAND; - case 0x88: - return DRC_ONEOPERAND; - case 0x89: - return DRC_ONEOPERAND; - case 0x8a: - return DRC_ONEOPERAND; - case 0x8b: - return DRC_ONEOPERAND; - case 0x8c: - return DRC_ONEOPERAND; - case 0x8d: - return DRC_ONEOPERAND; - case 0x8e: - return DRC_ONEOPERAND; - case 0x8f: - return DRC_ONEOPERAND; - case 0x90: - return DRC_ONEOPERAND; - case 0x91: - return DRC_ONEOPERAND; - case 0x92: - return DRC_TWOOPERANDS; - case 0x93: - return DRC_ONEOPERAND; - case 0x94: - return DRC_ONEOPERAND; - case 0x95: - return DRC_ONEOPERAND; - case 0x96: - return DRC_ZEROOPERANDS; - case 0x97: - return DRC_DWARFv3 | DRC_ZEROOPERANDS; - case 0x98: - return DRC_DWARFv3 | DRC_ONEOPERAND; - case 0x99: - return DRC_DWARFv3 | DRC_ONEOPERAND; - case 0x9a: - return DRC_DWARFv3 | DRC_ONEOPERAND; - case 0xa3: /* DW_OP_entry_value */ - return DRC_TWOOPERANDS; - case 0xf0: - return DRC_ZEROOPERANDS; /* DW_OP_APPLE_uninit */ - case 0xe0: - return 0; - case 0xff: - return 0; - default: - return 0; - } -} - const char *DW_ATE_value_to_name(uint32_t val) { static char invalid[100]; llvm::StringRef llvmstr = llvm::dwarf::AttributeEncodingString(val); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h index 670fde262e1e8..1b7102cd7e317 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h @@ -26,52 +26,12 @@ const char *DW_FORM_value_to_name(uint32_t val); const char *DW_OP_value_to_name(uint32_t val); -DRC_class DW_OP_value_to_class(uint32_t val); - const char *DW_ATE_value_to_name(uint32_t val); const char *DW_LANG_value_to_name(uint32_t val); const char *DW_LNS_value_to_name(uint32_t val); -/* These DRC are entirely our own construction, - although they are derived from various comments in the DWARF standard. - Most of these are not useful to the parser, but the DW_AT and DW_FORM - classes should prove to be usable in some fashion. */ - -#define DRC_0x65 0x1 -#define DRC_ADDRESS 0x2 -#define DRC_BLOCK 0x4 -#define DRC_CONSTANT 0x8 -#define DRC_DWARFv3 0x10 -#define DRC_FLAG 0x20 -#define DRC_INDIRECT_SPECIAL 0x40 -#define DRC_LINEPTR 0x80 -#define DRC_LOCEXPR 0x100 -#define DRC_LOCLISTPTR 0x200 -#define DRC_MACPTR 0x400 -#define DRC_ONEOPERAND 0x800 -#define DRC_OPERANDONE_1BYTE_DELTA 0x1000 -#define DRC_OPERANDONE_2BYTE_DELTA 0x2000 -#define DRC_OPERANDONE_4BYTE_DELTA 0x4000 -#define DRC_OPERANDONE_ADDRESS 0x8000 -#define DRC_OPERANDONE_BLOCK 0x10000 -#define DRC_OPERANDONE_SLEB128_OFFSET 0x20000 -#define DRC_OPERANDONE_ULEB128_OFFSET 0x40000 -#define DRC_OPERANDONE_ULEB128_REGISTER 0x80000 -#define DRC_OPERANDTWO_BLOCK 0x100000 -#define DRC_OPERANDTWO_SLEB128_OFFSET 0x200000 -#define DRC_OPERANDTWO_ULEB128_OFFSET 0x400000 -#define DRC_OPERANDTWO_ULEB128_REGISTER 0x800000 -#define DRC_OPERNADONE_ULEB128_REGISTER 0x1000000 -#define DRC_RANGELISTPTR 0x2000000 -#define DRC_REFERENCE 0x4000000 -#define DRC_STRING 0x8000000 -#define DRC_TWOOPERANDS 0x10000000 -#define DRC_VENDOR_GNU 0x20000000 -#define DRC_VENDOR_MIPS 0x40000000 -#define DRC_ZEROOPERANDS 0x80000000 - } // namespace lldb_private #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEFINES_H diff --git a/lldb/source/Symbol/UnwindPlan.cpp b/lldb/source/Symbol/UnwindPlan.cpp index 438b329274839..e8906f38e2ffe 100644 --- a/lldb/source/Symbol/UnwindPlan.cpp +++ b/lldb/source/Symbol/UnwindPlan.cpp @@ -15,6 +15,7 @@ #include "lldb/Target/Thread.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Log.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" using namespace lldb; using namespace lldb_private; @@ -79,13 +80,10 @@ GetByteOrderAndAddrSize(Thread *thread) { static void DumpDWARFExpr(Stream &s, llvm::ArrayRef expr, Thread *thread) { if (auto order_and_width = GetByteOrderAndAddrSize(thread)) { - DataExtractor extractor(expr.data(), expr.size(), order_and_width->first, - order_and_width->second); - if (!DWARFExpression::PrintDWARFExpression(s, extractor, - order_and_width->second, - /*dwarf_ref_size*/ 4, - /*location_expression*/ false)) - s.PutCString("invalid-dwarf-expr"); + llvm::DataExtractor data(expr, order_and_width->first == eByteOrderLittle, + order_and_width->second); + llvm::DWARFExpression(data, order_and_width->second, llvm::dwarf::DWARF32) + .print(s.AsRawOstream(), nullptr, nullptr); } else s.PutCString("dwarf-expr"); } diff --git a/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-arm.yaml b/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-arm.yaml index f7f46b7e2bff1..61a33e7ba7fc6 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-arm.yaml +++ b/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-arm.yaml @@ -6,8 +6,7 @@ # CHECK: Symbol file UnwindPlan: # CHECK: row[0]: 0: CFA=DW_OP_breg13 +0, DW_OP_consts +0, DW_OP_plus => pc=DW_OP_breg14 +0 -# CHECK-NEXT: row[1]: 2: CFA=DW_OP_breg13 +0, DW_OP_consts +8, DW_OP_plus => r7=DW_OP_pick 0x00, DW_OP_consts -8, DW_OP_plus , DW_OP_deref pc=DW_OP_pick 0x00, DW_OP_consts -4, DW_OP_plus , DW_OP_deref -# +# CHECK-NEXT: row[1]: 2: CFA=DW_OP_breg13 +0, DW_OP_consts +8, DW_OP_plus => r7=DW_OP_pick 0x0, DW_OP_consts -8, DW_OP_plus, DW_OP_deref pc=DW_OP_pick 0x0, DW_OP_consts -4, DW_OP_plus, DW_OP_deref --- !minidump Streams: diff --git a/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-parsing.test b/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-parsing.test index ffb64602c15e5..a6e29290854ad 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-parsing.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/stack-cfi-parsing.test @@ -11,8 +11,8 @@ image show-unwind -n func0 # CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes. # CHECK-NEXT: This UnwindPlan is valid at all instruction locations: no. # CHECK-NEXT: Address range of this UnwindPlan: [stack-cfi-parsing.out..module_image + 0-0x0000000000000002) -# CHECK-NEXT: row[0]: 0: CFA=DW_OP_breg7 +0 => rbp=DW_OP_breg7 +0 rip=DW_OP_pick 0x00 -# CHECK-NEXT: row[1]: 1: CFA=DW_OP_breg7 +0 => rbx=DW_OP_breg2 +0 rbp=DW_OP_breg0 +0 rip=DW_OP_pick 0x00 +# CHECK-NEXT: row[0]: 0: CFA=DW_OP_breg7 +0 => rbp=DW_OP_breg7 +0 rip=DW_OP_pick 0x0 +# CHECK-NEXT: row[1]: 1: CFA=DW_OP_breg7 +0 => rbx=DW_OP_breg2 +0 rbp=DW_OP_breg0 +0 rip=DW_OP_pick 0x0 # The following plans are all (syntactically) invalid for various reasons. # Processing those should not cause a crash. diff --git a/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-raSearch.test b/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-raSearch.test index 02404a86fa94e..93ea3ce029419 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-raSearch.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-raSearch.test @@ -13,13 +13,13 @@ image show-unwind -n call_many # CHECK: This UnwindPlan is sourced from the compiler: yes. # CHECK: This UnwindPlan is valid at all instruction locations: no. # CHECK: Address range of this UnwindPlan: [unwind-via-stack-win.exe..module_image + 4112-0x0000107d) -# CHECK: row[0]: 0: CFA=RaSearch@SP+0 => esp=DW_OP_pick 0x00, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x00, DW_OP_deref +# CHECK: row[0]: 0: CFA=RaSearch@SP+0 => esp=DW_OP_pick 0x0, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x0, DW_OP_deref image show-unwind -n nonzero_frame_size # CHECK-LABEL: image show-unwind -n nonzero_frame_size # CHECK: UNWIND PLANS for unwind-via-stack-win.exe`nonzero_frame_size # CHECK: Symbol file UnwindPlan: -# CHECK: row[0]: 0: CFA=RaSearch@SP+12 => esp=DW_OP_pick 0x00, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x00, DW_OP_deref +# CHECK: row[0]: 0: CFA=RaSearch@SP+12 => esp=DW_OP_pick 0x0, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x0, DW_OP_deref # Then, some invalid rules. image show-unwind -n complex_rasearch diff --git a/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-cfi.test b/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-cfi.test index 0a67cb3431102..29cf130694e6d 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-cfi.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-cfi.test @@ -13,7 +13,7 @@ image show-unwind -n bar # CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes. # CHECK-NEXT: This UnwindPlan is valid at all instruction locations: no. # CHECK-NEXT: Address range of this UnwindPlan: [unwind-via-stack-cfi..module_image + 1056-0x0000000000000449) -# CHECK-NEXT: row[0]: 0: CFA=DW_OP_breg6 +0, DW_OP_deref => rbp=DW_OP_pick 0x00, DW_OP_deref rsp=DW_OP_pick 0x00, DW_OP_consts +16, DW_OP_plus rip=DW_OP_pick 0x00, DW_OP_consts +8, DW_OP_plus , DW_OP_deref +# CHECK-NEXT: row[0]: 0: CFA=DW_OP_breg6 +0, DW_OP_deref => rbp=DW_OP_pick 0x0, DW_OP_deref rsp=DW_OP_pick 0x0, DW_OP_consts +16, DW_OP_plus rip=DW_OP_pick 0x0, DW_OP_consts +8, DW_OP_plus, DW_OP_deref thread backtrace # CHECK-LABEL: thread backtrace diff --git a/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-win.test b/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-win.test index 522637bf952fc..e95367a213f84 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-win.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/unwind-via-stack-win.test @@ -15,7 +15,7 @@ image show-unwind -n call_many # CHECK: This UnwindPlan is sourced from the compiler: yes. # CHECK: This UnwindPlan is valid at all instruction locations: no. # CHECK: Address range of this UnwindPlan: [unwind-via-stack-win.exe..module_image + 4112-0x0000107d) -# CHECK: row[0]: 0: CFA=DW_OP_breg7 +0, DW_OP_consts +80, DW_OP_plus => esp=DW_OP_pick 0x00, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x00, DW_OP_deref +# CHECK: row[0]: 0: CFA=DW_OP_breg7 +0, DW_OP_consts +80, DW_OP_plus => esp=DW_OP_pick 0x0, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x0, DW_OP_deref # Then, some invalid rules. image show-unwind -n bogus_rule @@ -43,7 +43,7 @@ image show-unwind -n temporary_var # CHECK: This UnwindPlan is sourced from the compiler: yes. # CHECK: This UnwindPlan is valid at all instruction locations: no. # CHECK: Address range of this UnwindPlan: [unwind-via-stack-win.exe..module_image + 4400-0x00001134) -# CHECK: row[0]: 0: CFA=DW_OP_breg7 +0 => esp=DW_OP_pick 0x00, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x00, DW_OP_deref +# CHECK: row[0]: 0: CFA=DW_OP_breg7 +0 => esp=DW_OP_pick 0x0, DW_OP_consts +4, DW_OP_plus eip=DW_OP_pick 0x0, DW_OP_deref # And finally, check that backtracing works as a whole by unwinding a simple # stack. diff --git a/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test b/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test index c0b6e5e50f86a..3df9906394f43 100644 --- a/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test +++ b/lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test @@ -20,4 +20,4 @@ thread backtrace target modules show-unwind -n foo # CHECK: eh_frame UnwindPlan: -# CHECK: row[0]: 0: CFA=rsp +8 => rip=DW_OP_lit8 , DW_OP_minus , DW_OP_deref , DW_OP_const1u 0x47, DW_OP_minus +# CHECK: row[0]: 0: CFA=rsp +8 => rip=DW_OP_lit8, DW_OP_minus, DW_OP_deref, DW_OP_const1u 0x47, DW_OP_minus diff --git a/lldb/test/Shell/Unwind/unwind-plan-dwarf-dump.test b/lldb/test/Shell/Unwind/unwind-plan-dwarf-dump.test index 67b482b2df917..6dbb518a656b0 100644 --- a/lldb/test/Shell/Unwind/unwind-plan-dwarf-dump.test +++ b/lldb/test/Shell/Unwind/unwind-plan-dwarf-dump.test @@ -11,4 +11,4 @@ process launch target modules show-unwind -n main # CHECK: eh_frame UnwindPlan: -# CHECK: row[0]: 0: CFA=DW_OP_breg7 +0, DW_OP_const1u 0x00, DW_OP_plus => rip=DW_OP_const1s -8, DW_OP_plus , DW_OP_deref +# CHECK: row[0]: 0: CFA=DW_OP_breg7 +0, DW_OP_const1u 0x0, DW_OP_plus => rip=DW_OP_const1s -8, DW_OP_plus, DW_OP_deref diff --git a/lldb/unittests/Symbol/PostfixExpressionTest.cpp b/lldb/unittests/Symbol/PostfixExpressionTest.cpp index 1dad83eabb4f5..7def709a60901 100644 --- a/lldb/unittests/Symbol/PostfixExpressionTest.cpp +++ b/lldb/unittests/Symbol/PostfixExpressionTest.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/Symbol/PostfixExpression.h" -#include "lldb/Expression/DWARFExpression.h" #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/StreamString.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "gmock/gmock.h" @@ -152,17 +152,14 @@ static std::string ParseAndGenerateDWARF(llvm::StringRef expr) { ToDWARF(*ast, dwarf); // print dwarf expression to comparable textual representation - DataExtractor extractor(dwarf.GetData(), dwarf.GetSize(), - lldb::eByteOrderLittle, addr_size); - - StreamString result; - if (!DWARFExpression::PrintDWARFExpression(result, extractor, addr_size, - /*dwarf_ref_size*/ 4, - /*location_expression*/ false)) { - return "DWARF printing failed."; - } - - return std::string(result.GetString()); + llvm::DataExtractor extractor(dwarf.GetString(), /*IsLittleEndian=*/true, + addr_size); + + std::string result; + llvm::raw_string_ostream os(result); + llvm::DWARFExpression(extractor, addr_size, llvm::dwarf::DWARF32) + .print(os, nullptr, nullptr); + return std::move(os.str()); } TEST(PostfixExpression, ToDWARF) { @@ -170,28 +167,28 @@ TEST(PostfixExpression, ToDWARF) { EXPECT_EQ("DW_OP_breg1 +0", ParseAndGenerateDWARF("R1")); - EXPECT_EQ("DW_OP_bregx 65 0", ParseAndGenerateDWARF("R65")); + EXPECT_EQ("DW_OP_bregx 0x41 +0", ParseAndGenerateDWARF("R65")); - EXPECT_EQ("DW_OP_pick 0x00", ParseAndGenerateDWARF("INIT")); + EXPECT_EQ("DW_OP_pick 0x0", ParseAndGenerateDWARF("INIT")); - EXPECT_EQ("DW_OP_pick 0x00, DW_OP_pick 0x01, DW_OP_plus ", + EXPECT_EQ("DW_OP_pick 0x0, DW_OP_pick 0x1, DW_OP_plus", ParseAndGenerateDWARF("INIT INIT +")); - EXPECT_EQ("DW_OP_breg1 +0, DW_OP_pick 0x01, DW_OP_plus ", + EXPECT_EQ("DW_OP_breg1 +0, DW_OP_pick 0x1, DW_OP_plus", ParseAndGenerateDWARF("R1 INIT +")); - EXPECT_EQ("DW_OP_consts +1, DW_OP_pick 0x01, DW_OP_deref , DW_OP_plus ", + EXPECT_EQ("DW_OP_consts +1, DW_OP_pick 0x1, DW_OP_deref, DW_OP_plus", ParseAndGenerateDWARF("1 INIT ^ +")); - EXPECT_EQ("DW_OP_consts +4, DW_OP_consts +5, DW_OP_plus ", + EXPECT_EQ("DW_OP_consts +4, DW_OP_consts +5, DW_OP_plus", ParseAndGenerateDWARF("4 5 +")); - EXPECT_EQ("DW_OP_consts +4, DW_OP_consts +5, DW_OP_minus ", + EXPECT_EQ("DW_OP_consts +4, DW_OP_consts +5, DW_OP_minus", ParseAndGenerateDWARF("4 5 -")); - EXPECT_EQ("DW_OP_consts +4, DW_OP_deref ", ParseAndGenerateDWARF("4 ^")); + EXPECT_EQ("DW_OP_consts +4, DW_OP_deref", ParseAndGenerateDWARF("4 ^")); - EXPECT_EQ("DW_OP_breg6 +0, DW_OP_consts +128, DW_OP_lit1 " - ", DW_OP_minus , DW_OP_not , DW_OP_and ", + EXPECT_EQ("DW_OP_breg6 +0, DW_OP_consts +128, DW_OP_lit1, DW_OP_minus, " + "DW_OP_not, DW_OP_and", ParseAndGenerateDWARF("R6 128 @")); } diff --git a/lldb/unittests/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpressionTests.cpp b/lldb/unittests/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpressionTests.cpp index 49e8a1a9fb87f..334a08f92033b 100644 --- a/lldb/unittests/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpressionTests.cpp +++ b/lldb/unittests/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpressionTests.cpp @@ -11,11 +11,11 @@ #include "Plugins/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpression.h" #include "lldb/Core/StreamBuffer.h" -#include "lldb/Expression/DWARFExpression.h" #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/StreamString.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" using namespace lldb; using namespace lldb_private; @@ -27,30 +27,22 @@ static void CheckValidProgramTranslation(llvm::StringRef fpo_program, llvm::StringRef target_register_name, llvm::StringRef expected_dwarf_expression) { - // initial setup - ArchSpec arch_spec("i686-pc-windows"); - llvm::Triple::ArchType arch_type = arch_spec.GetMachine(); - ByteOrder byte_order = arch_spec.GetByteOrder(); - uint32_t address_size = arch_spec.GetAddressByteSize(); - uint32_t byte_size = arch_spec.GetDataByteSize(); - // program translation - StreamBuffer<32> stream(Stream::eBinary, address_size, byte_order); + StreamBuffer<32> stream(Stream::eBinary, 4, eByteOrderLittle); ASSERT_TRUE(TranslateFPOProgramToDWARFExpression( - fpo_program, target_register_name, arch_type, stream)); + fpo_program, target_register_name, llvm::Triple::x86, stream)); // print dwarf expression to comparable textual representation - DataBufferSP buffer = - std::make_shared(stream.GetData(), stream.GetSize()); - DataExtractor extractor(buffer, byte_order, address_size, byte_size); + llvm::DataExtractor extractor({stream.GetData(), stream.GetSize()}, + /*IsLittleEndian=*/true, /*AddressSize=*/4); - StreamString result_dwarf_expression; - ASSERT_TRUE(DWARFExpression::PrintDWARFExpression( - result_dwarf_expression, extractor, address_size, 4, false)); + std::string result; + llvm::raw_string_ostream os(result); + llvm::DWARFExpression(extractor, /*AddressSize=*/4, llvm::dwarf::DWARF32) + .print(os, nullptr, nullptr); // actual check - ASSERT_STREQ(expected_dwarf_expression.data(), - result_dwarf_expression.GetString().data()); + ASSERT_EQ(expected_dwarf_expression, os.str()); } TEST(PDBFPOProgramToDWARFExpressionTests, SingleAssignmentRegisterRef) { @@ -64,9 +56,9 @@ TEST(PDBFPOProgramToDWARFExpressionTests, MultipleIndependentAssignments) { TEST(PDBFPOProgramToDWARFExpressionTests, MultipleDependentAssignments) { CheckValidProgramTranslation( "$T1 $ebp 4 + = $T0 $T1 8 - 128 @ = ", "$T0", - "DW_OP_breg6 +0, DW_OP_consts +4, DW_OP_plus , DW_OP_consts +8, " - "DW_OP_minus , DW_OP_consts +128, DW_OP_lit1 , DW_OP_minus , DW_OP_not , " - "DW_OP_and "); + "DW_OP_breg6 +0, DW_OP_consts +4, DW_OP_plus, DW_OP_consts +8, " + "DW_OP_minus, DW_OP_consts +128, DW_OP_lit1, DW_OP_minus, DW_OP_not, " + "DW_OP_and"); } TEST(PDBFPOProgramToDWARFExpressionTests, DependencyChain) { From c8b7c73c57f0c835f036aaa00a4970fc91d40020 Mon Sep 17 00:00:00 2001 From: Shuhong Liu Date: Mon, 25 May 2020 10:11:55 -0400 Subject: [PATCH 034/770] Add AIX to the test macro-same-context XFAIL list Summary: Since the integrated assembly parser was not implemented yet for AIX and macro is not part of the native assembly dialect on AIX, the test macro-same-context is expected to fail for AIX; hence added AIX to XFAIL list. Reviewers: hubert.reinterpretcast, daltenty, jasonliu Reviewed By: daltenty Subscribers: jasonliu, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80232 --- llvm/test/MC/AsmParser/macro-same-context.ll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/test/MC/AsmParser/macro-same-context.ll b/llvm/test/MC/AsmParser/macro-same-context.ll index fac30e44193fd..0acc195242083 100644 --- a/llvm/test/MC/AsmParser/macro-same-context.ll +++ b/llvm/test/MC/AsmParser/macro-same-context.ll @@ -2,6 +2,9 @@ ;; thus a definition is available to the whole file. PR36110 ; RUN: not llc < %s 2>&1 | FileCheck %s ; REQUIRES: default_triple +;; This test is expected to fail on AIX, +;; since the integrated assembly parser was not implemented yet for AIX. +; XFAIL: aix define void @test() { call void asm sideeffect ".macro FOO\0A.endm", "~{dirflag},~{fpsr},~{flags}"() #1 From a6c4cd3bcb715c112607fcc4a1c806d511e2f947 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 15:33:44 +0100 Subject: [PATCH 035/770] [X86] Add PTEST tests showing failure to extract allsign cases As discussed on PR42035, we can often use MOVMSK to avoid a cmpgt/ashr by just analysing the extracted signbits. --- llvm/test/CodeGen/X86/combine-ptest.ll | 99 +++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll index d23277f627680..2928023c7fc2a 100644 --- a/llvm/test/CodeGen/X86/combine-ptest.ll +++ b/llvm/test/CodeGen/X86/combine-ptest.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 ; ; testz(~X,Y) -> testc(X,Y) @@ -297,6 +298,102 @@ start: ret i1 %6 } +; +; TODO: testz(ashr(X,bw-1),-1) -> movmsk(X) +; + +define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) { +; CHECK-LABEL: ptestz_v2i64_signbits: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vptest %xmm0, %xmm0 +; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: retq + %t1 = ashr <2 x i64> %c, + %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> ) + %t3 = icmp ne i32 %t2, 0 + %t4 = select i1 %t3, i32 %a, i32 %b + ret i32 %t4 +} + +define i32 @ptestz_v8i32_signbits(<8 x i32> %c, i32 %a, i32 %b) { +; AVX1-LABEL: ptestz_v8i32_signbits: +; AVX1: # %bb.0: +; AVX1-NEXT: movl %edi, %eax +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vptest %ymm0, %ymm0 +; AVX1-NEXT: cmovnel %esi, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: ptestz_v8i32_signbits: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %edi, %eax +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 +; AVX2-NEXT: vptest %ymm0, %ymm0 +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %t1 = ashr <8 x i32> %c, + %t2 = bitcast <8 x i32> %t1 to <4 x i64> + %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> ) + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t4, i32 %a, i32 %b + ret i32 %t5 +} + +define i32 @ptestz_v8i16_signbits(<8 x i16> %c, i32 %a, i32 %b) { +; CHECK-LABEL: ptestz_v8i16_signbits: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: vpsraw $15, %xmm0, %xmm0 +; CHECK-NEXT: vptest %xmm0, %xmm0 +; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: retq + %t1 = ashr <8 x i16> %c, + %t2 = bitcast <8 x i16> %t1 to <2 x i64> + %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> ) + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t4, i32 %a, i32 %b + ret i32 %t5 +} + +define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) { +; AVX1-LABEL: ptestz_v32i8_signbits: +; AVX1: # %bb.0: +; AVX1-NEXT: movl %edi, %eax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vptest %ymm0, %ymm0 +; AVX1-NEXT: cmovnel %esi, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: ptestz_v32i8_signbits: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %edi, %eax +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vptest %ymm0, %ymm0 +; AVX2-NEXT: cmovnel %esi, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %t1 = ashr <32 x i8> %c, + %t2 = bitcast <32 x i8> %t1 to <4 x i64> + %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> ) + %t4 = icmp ne i32 %t3, 0 + %t5 = select i1 %t4, i32 %a, i32 %b + ret i32 %t5 +} + declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone From 7b1dc0015aec39ad27619872f5debbd86f8f9a2c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 May 2020 15:35:47 +0100 Subject: [PATCH 036/770] MustExecute.h - remove unnecessary includes. NFC. Reduce to forward declarations and fix implicit LoopInfo.h dependency in Attributor.h --- llvm/include/llvm/Analysis/MustExecute.h | 14 +++++++------- llvm/include/llvm/Transforms/IPO/Attributor.h | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index 181fdacad2334..093e561690402 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -24,11 +24,9 @@ #define LLVM_ANALYSIS_MUSTEXECUTE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionPrecedenceTracking.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" namespace llvm { @@ -37,15 +35,17 @@ namespace { template using GetterTy = std::function; } -class Instruction; +class BasicBlock; class DominatorTree; -class PostDominatorTree; +class Instruction; class Loop; +class LoopInfo; +class PostDominatorTree; /// Captures loop safety information. /// It keep information for loop blocks may throw exception or otherwise -/// exit abnormaly on any iteration of the loop which might actually execute -/// at runtime. The primary way to consume this infromation is via +/// exit abnormally on any iteration of the loop which might actually execute +/// at runtime. The primary way to consume this information is via /// isGuaranteedToExecute below, but some callers bailout or fallback to /// alternate reasoning if a loop contains any implicit control flow. /// NOTE: LoopSafetyInfo contains cached information regarding loops and their diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 6ec4c3feb28da..a5d5043347450 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -107,6 +107,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" From 3c6c2ecd6efa393e7a8422d88e5d4ada0970e47e Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 25 May 2020 17:45:18 +0300 Subject: [PATCH 037/770] [AMDGPU] Added 'A' constraint for inline assembler Summary: 'A' constraint requires an immediate int or fp constant that can be inlined in an instruction encoding. This is the second part of the change. The llvm part has been committed as b087b91c9170. See https://reviews.llvm.org/D78494 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D79493 --- clang/lib/Basic/Targets/AMDGPU.h | 5 +++++ clang/test/Sema/inline-asm-validate-amdgpu.cl | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index d0e88e223e955..6c9060aa3f7bf 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -131,6 +131,11 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { }); StringRef S(Name); + if (S == "A") { + Info.setRequiresImmediate(); + return true; + } + bool HasLeftParen = false; if (S.front() == '{') { HasLeftParen = true; diff --git a/clang/test/Sema/inline-asm-validate-amdgpu.cl b/clang/test/Sema/inline-asm-validate-amdgpu.cl index 51009ecb3f1e0..3d6488227ef29 100644 --- a/clang/test/Sema/inline-asm-validate-amdgpu.cl +++ b/clang/test/Sema/inline-asm-validate-amdgpu.cl @@ -17,6 +17,10 @@ kernel void test () { // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); + + // 'A' constraint + __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "A" (imm) : ); + } __kernel void From 7c298c104bfe725d4315926a656263e8a5ac3054 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 14 May 2020 07:23:10 +0200 Subject: [PATCH 038/770] [PGO] Fix computation of function Hash Previous implementation was incorrectly passing an uint64_t, that got converted to an uint8_t, to finalize the hash computation. This led to different functions having the same hash if they only differ by the remaining statements, which is incorrect. Added a new test case that trivially tests that a small function change is reflected in the hash value. Not that as this patch fixes the hash computation, it invalidates all hashes computed before that patch applies, which could be an issue for large build system that pre-compute the profile data and let client download them as part of the build process. Differential Revision: https://reviews.llvm.org/D79961 --- clang/lib/CodeGen/CodeGenPGO.cpp | 8 +++++--- clang/test/Profile/c-collision.c | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 clang/test/Profile/c-collision.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 3c91a04d54642..98827bc3eec5e 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -747,13 +747,15 @@ uint64_t PGOHash::finalize() { return Working; // Check for remaining work in Working. - if (Working) - MD5.update(Working); + if (Working) { + using namespace llvm::support; + uint64_t Swapped = endian::byte_swap(Working); + MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); + } // Finalize the MD5 and return the hash. llvm::MD5::MD5Result Result; MD5.final(Result); - using namespace llvm::support; return Result.low(); } diff --git a/clang/test/Profile/c-collision.c b/clang/test/Profile/c-collision.c new file mode 100644 index 0000000000000..fabecd752b4ef --- /dev/null +++ b/clang/test/Profile/c-collision.c @@ -0,0 +1,22 @@ +// Test that a slight change in the code leads to a different hash. +// RUN: %clang_cc1 -UEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-NOEXTRA +// RUN: %clang_cc1 -DEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-EXTRA + +// CHECK-NOEXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 7156072912471487002, +// CHECK-EXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 -4383447408116050035, + +extern int bar; +void foo() { + if (bar) { + } + if (bar) { + } + if (bar) { + if (bar) { +#ifdef EXTRA + if (bar) { + } +#endif + } + } +} From ba92b274225fc78dc15e8dc0076f71e7a8b5d084 Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Thu, 9 Apr 2020 16:20:07 +0300 Subject: [PATCH 039/770] [analyzer] Improved RangeSet::Negate support of unsigned ranges Summary: This fixes https://bugs.llvm.org/show_bug.cgi?id=41588 RangeSet Negate function shall handle unsigned ranges as well as signed ones. RangeSet getRangeForMinusSymbol function shall use wider variety of ranges, not only concrete value ranges. RangeSet Intersect functions shall not produce assertions. Changes: Improved safety of RangeSet::Intersect function. Added isEmpty() check to prevent an assertion. Added support of handling unsigned ranges to RangeSet::Negate and RangeSet::getRangeForMinusSymbol. Extended RangeSet::getRangeForMinusSymbol to return not only range sets with single value [n,n], but with wide ranges [n,m]. Added unit test for Negate function. Added regression tests for unsigned values. Differential Revision: https://reviews.llvm.org/D77802 --- .../Core/RangeConstraintManager.cpp | 101 ++++++++++---- .../constraint_manager_negate_difference.c | 49 ++++++- clang/unittests/StaticAnalyzer/CMakeLists.txt | 1 + .../unittests/StaticAnalyzer/RangeSetTest.cpp | 130 ++++++++++++++++++ 4 files changed, 251 insertions(+), 30 deletions(-) create mode 100644 clang/unittests/StaticAnalyzer/RangeSetTest.cpp diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 9752a0e22832c..137e2cefe5a04 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -155,11 +155,11 @@ bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { // or, alternatively, /removing/ all integers between Upper and Lower. RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, llvm::APSInt Lower, llvm::APSInt Upper) const { - if (!pin(Lower, Upper)) - return F.getEmptySet(); - PrimRangeSet newRanges = F.getEmptySet(); + if (isEmpty() || !pin(Lower, Upper)) + return newRanges; + PrimRangeSet::iterator i = begin(), e = end(); if (Lower <= Upper) IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); @@ -190,33 +190,78 @@ RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, return newRanges; } -// Turn all [A, B] ranges to [-B, -A]. Ranges [MIN, B] are turned to range set -// [MIN, MIN] U [-B, MAX], when MIN and MAX are the minimal and the maximal -// signed values of the type. +// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus +// operation under the values of the type. +// +// We also handle MIN because applying unary minus to MIN does not change it. +// Example 1: +// char x = -128; // -128 is a MIN value in a range of 'char' +// char y = -x; // y: -128 +// Example 2: +// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' +// unsigned char y = -x; // y: 0 +// +// And it makes us to separate the range +// like [MIN, N] to [MIN, MIN] U [-N,MAX]. +// For instance, whole range is {-128..127} and subrange is [-128,-126], +// thus [-128,-127,-126,.....] negates to [-128,.....,126,127]. +// +// Negate restores disrupted ranges on bounds, +// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. RangeSet RangeSet::Negate(BasicValueFactory &BV, Factory &F) const { PrimRangeSet newRanges = F.getEmptySet(); - for (iterator i = begin(), e = end(); i != e; ++i) { - const llvm::APSInt &from = i->From(), &to = i->To(); - const llvm::APSInt &newTo = (from.isMinSignedValue() ? - BV.getMaxValue(from) : - BV.getValue(- from)); - if (to.isMaxSignedValue() && !newRanges.isEmpty() && - newRanges.begin()->From().isMinSignedValue()) { - assert(newRanges.begin()->To().isMinSignedValue() && - "Ranges should not overlap"); - assert(!from.isMinSignedValue() && "Ranges should not overlap"); - const llvm::APSInt &newFrom = newRanges.begin()->From(); - newRanges = - F.add(F.remove(newRanges, *newRanges.begin()), Range(newFrom, newTo)); - } else if (!to.isMinSignedValue()) { - const llvm::APSInt &newFrom = BV.getValue(- to); - newRanges = F.add(newRanges, Range(newFrom, newTo)); - } - if (from.isMinSignedValue()) { - newRanges = F.add(newRanges, Range(BV.getMinValue(from), - BV.getMinValue(from))); + if (isEmpty()) + return newRanges; + + const llvm::APSInt sampleValue = getMinValue(); + const llvm::APSInt &MIN = BV.getMinValue(sampleValue); + const llvm::APSInt &MAX = BV.getMaxValue(sampleValue); + + // Handle a special case for MIN value. + iterator i = begin(); + const llvm::APSInt &from = i->From(); + const llvm::APSInt &to = i->To(); + if (from == MIN) { + // If [from, to] are [MIN, MAX], then just return the same [MIN, MAX]. + if (to == MAX) { + newRanges = ranges; + } else { + // Add separate range for the lowest value. + newRanges = F.add(newRanges, Range(MIN, MIN)); + // Skip adding the second range in case when [from, to] are [MIN, MIN]. + if (to != MIN) { + newRanges = F.add(newRanges, Range(BV.getValue(-to), MAX)); + } } + // Skip the first range in the loop. + ++i; + } + + // Negate all other ranges. + for (iterator e = end(); i != e; ++i) { + // Negate int values. + const llvm::APSInt &newFrom = BV.getValue(-i->To()); + const llvm::APSInt &newTo = BV.getValue(-i->From()); + // Add a negated range. + newRanges = F.add(newRanges, Range(newFrom, newTo)); + } + + if (newRanges.isSingleton()) + return newRanges; + + // Try to find and unite next ranges: + // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. + iterator iter1 = newRanges.begin(); + iterator iter2 = std::next(iter1); + + if (iter1->To() == MIN && (iter2->From() - 1) == MIN) { + const llvm::APSInt &to = iter2->To(); + // remove adjacent ranges + newRanges = F.remove(newRanges, *iter1); + newRanges = F.remove(newRanges, *newRanges.begin()); + // add united range + newRanges = F.add(newRanges, Range(MIN, to)); } return newRanges; @@ -527,9 +572,7 @@ RangeConstraintManager::getRangeForMinusSymbol(ProgramStateRef State, SymbolRef negSym = SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T); if (const RangeSet *negV = State->get(negSym)) { - // Unsigned range set cannot be negated, unless it is [0, 0]. - if ((negV->getConcreteValue() && - (*negV->getConcreteValue() == 0)) || + if (T->isUnsignedIntegerOrEnumerationType() || T->isSignedIntegerOrEnumerationType()) return negV; } diff --git a/clang/test/Analysis/constraint_manager_negate_difference.c b/clang/test/Analysis/constraint_manager_negate_difference.c index 4412ae0e9733b..a33c5ca81c26a 100644 --- a/clang/test/Analysis/constraint_manager_negate_difference.c +++ b/clang/test/Analysis/constraint_manager_negate_difference.c @@ -4,7 +4,9 @@ void clang_analyzer_eval(int); void exit(int); -#define UINT_MAX (~0U) +#define UINT_MIN (0U) +#define UINT_MAX (~UINT_MIN) +#define UINT_MID (UINT_MAX / 2 + 1) #define INT_MAX (UINT_MAX & (UINT_MAX >> 1)) #define INT_MIN (UINT_MAX & ~(UINT_MAX >> 1)) @@ -110,3 +112,48 @@ void effective_range_2(int m, int n) { clang_analyzer_eval(m - n == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} clang_analyzer_eval(n - m == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} } + +void negate_unsigned_min(unsigned m, unsigned n) { + if (m - n == UINT_MIN) { + clang_analyzer_eval(n - m == UINT_MIN); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m != UINT_MIN); // expected-warning{{FALSE}} + clang_analyzer_eval(n - m > UINT_MIN); // expected-warning{{FALSE}} + clang_analyzer_eval(n - m < UINT_MIN); // expected-warning{{FALSE}} + } +} + +void negate_unsigned_mid(unsigned m, unsigned n) { + if (m - n == UINT_MID) { + clang_analyzer_eval(n - m == UINT_MID); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m != UINT_MID); // expected-warning{{FALSE}} + } +} + +void negate_unsigned_mid2(unsigned m, unsigned n) { + if (m - n < UINT_MID && m - n > UINT_MIN) { + clang_analyzer_eval(n - m > UINT_MID); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m < UINT_MID); // expected-warning{{FALSE}} + } +} + +void negate_unsigned_max(unsigned m, unsigned n) { + if (m - n == UINT_MAX) { + clang_analyzer_eval(n - m == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m != 1); // expected-warning{{FALSE}} + } +} + +void negate_unsigned_one(unsigned m, unsigned n) { + if (m - n == 1) { + clang_analyzer_eval(n - m == UINT_MAX); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m < UINT_MAX); // expected-warning{{FALSE}} + } +} + +// The next code is a repro for the bug PR41588 +void negated_unsigned_range(unsigned x, unsigned y) { + clang_analyzer_eval(x - y != 0); // expected-warning{{FALSE}} expected-warning{{TRUE}} + clang_analyzer_eval(y - x != 0); // expected-warning{{FALSE}} expected-warning{{TRUE}} + // expected no assertion on the next line + clang_analyzer_eval(x - y != 0); // expected-warning{{FALSE}} expected-warning{{TRUE}} +} diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt index 1070f124921da..e1f86af18b2b5 100644 --- a/clang/unittests/StaticAnalyzer/CMakeLists.txt +++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(StaticAnalysisTests StoreTest.cpp RegisterCustomCheckersTest.cpp SymbolReaperTest.cpp + RangeSetTest.cpp ) clang_target_link_libraries(StaticAnalysisTests diff --git a/clang/unittests/StaticAnalyzer/RangeSetTest.cpp b/clang/unittests/StaticAnalyzer/RangeSetTest.cpp new file mode 100644 index 0000000000000..83b4fac15a198 --- /dev/null +++ b/clang/unittests/StaticAnalyzer/RangeSetTest.cpp @@ -0,0 +1,130 @@ +//===- unittests/StaticAnalyzer/RangeSetTest.cpp ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Builtins.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h" +#include "clang/Tooling/Tooling.h" +#include "gtest/gtest.h" + +namespace clang { +namespace ento { +namespace { + +// TestCase contains to lists of ranges. +// Original one has to be negated. +// Expected one has to be compared to negated original range. +template struct TestCase { + RangeSet original; + RangeSet expected; + + TestCase(BasicValueFactory &BVF, RangeSet::Factory &F, + const std::initializer_list &originalList, + const std::initializer_list &expectedList) + : original(createRangeSetFromList(BVF, F, originalList)), + expected(createRangeSetFromList(BVF, F, expectedList)) {} + +private: + RangeSet createRangeSetFromList(BasicValueFactory &BVF, RangeSet::Factory &F, + const std::initializer_list rangeList) { + llvm::APSInt from(sizeof(T) * 8, std::is_unsigned::value); + llvm::APSInt to = from; + RangeSet rangeSet = F.getEmptySet(); + for (auto it = rangeList.begin(); it != rangeList.end(); it += 2) { + from = *it; + to = *(it + 1); + rangeSet = rangeSet.addRange( + F, RangeSet(F, BVF.getValue(from), BVF.getValue(to))); + } + return rangeSet; + } + + void printNegate(const TestCase &TestCase) { + TestCase.original.print(llvm::dbgs()); + llvm::dbgs() << " => "; + TestCase.expected.print(llvm::dbgs()); + } +}; + +class RangeSetTest : public testing::Test { +protected: + // Init block + std::unique_ptr AST = tooling::buildASTFromCode("struct foo;"); + ASTContext &context = AST->getASTContext(); + llvm::BumpPtrAllocator alloc; + BasicValueFactory BVF{context, alloc}; + RangeSet::Factory F; + // End init block + + template void checkNegate() { + using type = T; + + // Use next values of the range {MIN, A, B, MID, C, D, MAX}. + + // MID is a value in the middle of the range + // which unary minus does not affect on, + // e.g. int8/int32(0), uint8(128), uint32(2147483648). + + constexpr type MIN = std::numeric_limits::min(); + constexpr type MAX = std::numeric_limits::max(); + constexpr type MID = std::is_signed::value + ? 0 + : ~(static_cast(-1) / static_cast(2)); + constexpr type A = MID - static_cast(42 + 42); + constexpr type B = MID - static_cast(42); + constexpr type C = -B; + constexpr type D = -A; + + static_assert(MIN < A && A < B && B < MID && MID < C && C < D && D < MAX, + "Values shall be in an ascending order"); + + // Left {[x, y], [x, y]} is what shall be negated. + // Right {[x, y], [x, y]} is what shall be compared to a negation result. + TestCase cases[] = { + {BVF, F, {MIN, A}, {MIN, MIN, D, MAX}}, + {BVF, F, {MIN, C}, {MIN, MIN, B, MAX}}, + {BVF, F, {MIN, MID}, {MIN, MIN, MID, MAX}}, + {BVF, F, {MIN, MAX}, {MIN, MAX}}, + {BVF, F, {A, D}, {A, D}}, + {BVF, F, {A, B}, {C, D}}, + {BVF, F, {MIN, A, D, MAX}, {MIN, A, D, MAX}}, + {BVF, F, {MIN, B, MID, D}, {MIN, MIN, A, MID, C, MAX}}, + {BVF, F, {MIN, MID, C, D}, {MIN, MIN, A, B, MID, MAX}}, + {BVF, F, {MIN, MID, C, MAX}, {MIN, B, MID, MAX}}, + {BVF, F, {A, MID, D, MAX}, {MIN + 1, A, MID, D}}, + {BVF, F, {A, A}, {D, D}}, + {BVF, F, {MID, MID}, {MID, MID}}, + {BVF, F, {MAX, MAX}, {MIN + 1, MIN + 1}}, + }; + + for (const auto &c : cases) { + // Negate original and check with expected. + RangeSet negatedFromOriginal = c.original.Negate(BVF, F); + EXPECT_EQ(negatedFromOriginal, c.expected); + // Negate negated back and check with original. + RangeSet negatedBackward = negatedFromOriginal.Negate(BVF, F); + EXPECT_EQ(negatedBackward, c.original); + } + } +}; + +TEST_F(RangeSetTest, RangeSetNegateTest) { + checkNegate(); + checkNegate(); + checkNegate(); + checkNegate(); + checkNegate(); + checkNegate(); + checkNegate(); + checkNegate(); +} + +} // namespace +} // namespace ento +} // namespace clang From b62ce9e05d9ec95532fa131a3e47ff1d4e7ed5de Mon Sep 17 00:00:00 2001 From: Sergej Jaskiewicz Date: Mon, 25 May 2020 19:08:49 +0300 Subject: [PATCH 040/770] Re-commit "[libc++] [test] Generate static_test_env on the fly" Don't use std::filesystem APIs for CWDGuard, use POSIX functions instead. This way the tests don't rely on the correctness of the functionality they're testing. Differential Revision: https://reviews.llvm.org/D78200 --- .../Inputs/static_test_env/bad_symlink | 1 - .../Inputs/static_test_env/dir1/dir2/afile3 | 0 .../static_test_env/dir1/dir2/dir3/file5 | 0 .../Inputs/static_test_env/dir1/dir2/file4 | 0 .../static_test_env/dir1/dir2/symlink_to_dir3 | 1 - .../Inputs/static_test_env/dir1/file1 | 0 .../Inputs/static_test_env/dir1/file2 | 1 - .../Inputs/static_test_env/empty_file | 0 .../Inputs/static_test_env/non_empty_file | 1 - .../Inputs/static_test_env/symlink_to_dir | 1 - .../static_test_env/symlink_to_empty_file | 1 - .../directory_entry.cons/path.pass.cpp | 24 +- .../replace_filename.pass.cpp | 8 +- .../directory_entry.obs/file_size.pass.cpp | 16 +- .../file_type_obs.pass.cpp | 4 +- .../hard_link_count.pass.cpp | 16 +- .../last_write_time.pass.cpp | 16 +- .../directory_entry.obs/status.pass.cpp | 5 +- .../symlink_status.pass.cpp | 5 +- .../directory_iterator.members/copy.pass.cpp | 4 +- .../copy_assign.pass.cpp | 10 +- .../directory_iterator.members/ctor.pass.cpp | 19 +- .../increment.pass.cpp | 22 +- .../directory_iterator.members/move.pass.cpp | 4 +- .../move_assign.pass.cpp | 15 +- .../begin_end.pass.cpp | 8 +- .../rec.dir.itr.members/copy.pass.cpp | 4 +- .../rec.dir.itr.members/copy_assign.pass.cpp | 25 +- .../rec.dir.itr.members/ctor.pass.cpp | 19 +- .../rec.dir.itr.members/depth.pass.cpp | 8 +- .../disable_recursion_pending.pass.cpp | 4 +- .../rec.dir.itr.members/increment.pass.cpp | 29 ++- .../rec.dir.itr.members/move.pass.cpp | 4 +- .../rec.dir.itr.members/move_assign.pass.cpp | 25 +- .../rec.dir.itr.members/pop.pass.cpp | 12 +- .../recursion_pending.pass.cpp | 36 +-- .../rec.dir.itr.nonmembers/begin_end.pass.cpp | 8 +- .../fs.op.canonical/canonical.pass.cpp | 49 ++-- .../fs.op.funcs/fs.op.copy/copy.pass.cpp | 3 +- .../fs.op.current_path/current_path.pass.cpp | 17 +- .../fs.op.equivalent/equivalent.pass.cpp | 19 +- .../fs.op.funcs/fs.op.exists/exists.pass.cpp | 3 +- .../fs.op.file_size/file_size.pass.cpp | 18 +- .../fs.op.hard_lk_ct/hard_link_count.pass.cpp | 36 +-- .../is_block_file.pass.cpp | 3 +- .../is_character_file.pass.cpp | 3 +- .../fs.op.is_directory/is_directory.pass.cpp | 11 +- .../fs.op.is_empty/is_empty.pass.cpp | 14 +- .../fs.op.is_fifo/is_fifo.pass.cpp | 3 +- .../fs.op.is_other/is_other.pass.cpp | 3 +- .../is_regular_file.pass.cpp | 3 +- .../fs.op.is_socket/is_socket.pass.cpp | 3 +- .../fs.op.is_symlink/is_symlink.pass.cpp | 15 +- .../last_write_time.pass.cpp | 21 +- .../fs.op.relative/relative.pass.cpp | 61 +++-- .../fs.op.funcs/fs.op.space/space.pass.cpp | 20 +- .../fs.op.funcs/fs.op.status/status.pass.cpp | 17 +- .../symlink_status.pass.cpp | 17 +- .../weakly_canonical.pass.cpp | 28 ++- libcxx/test/support/filesystem_test_helper.h | 230 +++++++++++------- 60 files changed, 527 insertions(+), 426 deletions(-) delete mode 120000 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/bad_symlink delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/afile3 delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/dir3/file5 delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/file4 delete mode 120000 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/symlink_to_dir3 delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file1 delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file2 delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/empty_file delete mode 100644 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/non_empty_file delete mode 120000 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_dir delete mode 120000 libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_empty_file diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/bad_symlink b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/bad_symlink deleted file mode 120000 index 76646beed5ed3..0000000000000 --- a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/bad_symlink +++ /dev/null @@ -1 +0,0 @@ -dne \ No newline at end of file diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/afile3 b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/afile3 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/dir3/file5 b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/dir3/file5 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/file4 b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/file4 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/symlink_to_dir3 b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/symlink_to_dir3 deleted file mode 120000 index 3979139526219..0000000000000 --- a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/dir2/symlink_to_dir3 +++ /dev/null @@ -1 +0,0 @@ -dir3 \ No newline at end of file diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file1 b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file1 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file2 b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file2 deleted file mode 100644 index 44834e586734f..0000000000000 --- a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/dir1/file2 +++ /dev/null @@ -1 +0,0 @@ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \ No newline at end of file diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/empty_file b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/empty_file deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/non_empty_file b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/non_empty_file deleted file mode 100644 index 44834e586734f..0000000000000 --- a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/non_empty_file +++ /dev/null @@ -1 +0,0 @@ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \ No newline at end of file diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_dir b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_dir deleted file mode 120000 index df490f837a85c..0000000000000 --- a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_dir +++ /dev/null @@ -1 +0,0 @@ -dir1 \ No newline at end of file diff --git a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_empty_file b/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_empty_file deleted file mode 120000 index b79b689fc85ac..0000000000000 --- a/libcxx/test/std/input.output/filesystems/Inputs/static_test_env/symlink_to_empty_file +++ /dev/null @@ -1 +0,0 @@ -empty_file \ No newline at end of file diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp index 850be12e839c0..f7c3e442707d7 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -46,6 +45,7 @@ TEST_CASE(path_ctor) { } TEST_CASE(path_ec_ctor) { + static_test_env static_env; using namespace fs; { static_assert( @@ -61,8 +61,8 @@ TEST_CASE(path_ec_ctor) { } { std::error_code ec = GetTestEC(); - const directory_entry e(StaticEnv::File, ec); - TEST_CHECK(e.path() == StaticEnv::File); + const directory_entry e(static_env.File, ec); + TEST_CHECK(e.path() == static_env.File); TEST_CHECK(!ec); } { @@ -121,26 +121,28 @@ TEST_CASE(path_ctor_calls_refresh) { TEST_CASE(path_ctor_dne) { using namespace fs; + static_test_env static_env; + { std::error_code ec = GetTestEC(); - directory_entry ent(StaticEnv::DNE, ec); + directory_entry ent(static_env.DNE, ec); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - TEST_CHECK(ent.path() == StaticEnv::DNE); + TEST_CHECK(ent.path() == static_env.DNE); } // don't report dead symlinks as an error. { std::error_code ec = GetTestEC(); - directory_entry ent(StaticEnv::BadSymlink, ec); + directory_entry ent(static_env.BadSymlink, ec); TEST_CHECK(!ec); - TEST_CHECK(ent.path() == StaticEnv::BadSymlink); + TEST_CHECK(ent.path() == static_env.BadSymlink); } // DNE does not cause the constructor to throw { - directory_entry ent(StaticEnv::DNE); - TEST_CHECK(ent.path() == StaticEnv::DNE); + directory_entry ent(static_env.DNE); + TEST_CHECK(ent.path() == static_env.DNE); - directory_entry ent_two(StaticEnv::BadSymlink); - TEST_CHECK(ent_two.path() == StaticEnv::BadSymlink); + directory_entry ent_two(static_env.BadSymlink); + TEST_CHECK(ent_two.path() == static_env.BadSymlink); } } diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp index 2a2146be1b849..0f3624bd65a5b 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -54,6 +53,7 @@ TEST_CASE(test_replace_filename_method) { TEST_CASE(test_replace_filename_ec_method) { using namespace fs; + static_test_env static_env; { directory_entry e; path replace; @@ -76,9 +76,9 @@ TEST_CASE(test_replace_filename_ec_method) { TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); } { - const path p = StaticEnv::EmptyFile; - const path expect = StaticEnv::NonEmptyFile; - const path replace = StaticEnv::NonEmptyFile.filename(); + const path p = static_env.EmptyFile; + const path expect = static_env.NonEmptyFile; + const path replace = static_env.NonEmptyFile.filename(); TEST_REQUIRE(expect.parent_path() == p.parent_path()); directory_entry e(p); TEST_CHECK(e.path() == p); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp index 8ff1b46b3b992..ad4cc43c95d97 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -112,6 +111,7 @@ TEST_CASE(not_regular_file) { TEST_CASE(error_reporting) { using namespace fs; + static_test_env static_env; scoped_test_env env; const path dir = env.create_dir("dir"); @@ -127,15 +127,15 @@ TEST_CASE(error_reporting) { directory_entry ent; std::error_code ec = GetTestEC(); - ent.assign(StaticEnv::DNE, ec); - TEST_REQUIRE(ent.path() == StaticEnv::DNE); + ent.assign(static_env.DNE, ec); + TEST_REQUIRE(ent.path() == static_env.DNE); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); ec = GetTestEC(); TEST_CHECK(ent.file_size(ec) == uintmax_t(-1)); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - ExceptionChecker Checker(StaticEnv::DNE, + ExceptionChecker Checker(static_env.DNE, std::errc::no_such_file_or_directory, "directory_entry::file_size"); TEST_CHECK_THROW_RESULT(filesystem_error, Checker, ent.file_size()); @@ -145,20 +145,20 @@ TEST_CASE(error_reporting) { directory_entry ent; std::error_code ec = GetTestEC(); - uintmax_t expect_bad = file_size(StaticEnv::BadSymlink, ec); + uintmax_t expect_bad = file_size(static_env.BadSymlink, ec); TEST_CHECK(expect_bad == uintmax_t(-1)); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); ec = GetTestEC(); - ent.assign(StaticEnv::BadSymlink, ec); - TEST_REQUIRE(ent.path() == StaticEnv::BadSymlink); + ent.assign(static_env.BadSymlink, ec); + TEST_REQUIRE(ent.path() == static_env.BadSymlink); TEST_CHECK(!ec); ec = GetTestEC(); TEST_CHECK(ent.file_size(ec) == expect_bad); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - ExceptionChecker Checker(StaticEnv::BadSymlink, + ExceptionChecker Checker(static_env.BadSymlink, std::errc::no_such_file_or_directory, "directory_entry::file_size"); TEST_CHECK_THROW_RESULT(filesystem_error, Checker, ent.file_size()); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp index f5a44b76a8b03..b3bfa5e7eeacb 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp @@ -149,8 +149,8 @@ TEST_CASE(test_with_ec_dne) { using fs::directory_entry; using fs::file_status; using fs::path; - - for (auto p : {StaticEnv::DNE, StaticEnv::BadSymlink}) { + static_test_env static_env; + for (auto p : {static_env.DNE, static_env.BadSymlink}) { directory_entry e(p); std::error_code status_ec = GetTestEC(); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp index d9958a6c5f030..8090bd77da720 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -110,6 +109,7 @@ TEST_CASE(not_regular_file) { TEST_CASE(error_reporting) { using namespace fs; + static_test_env static_env; scoped_test_env env; const path dir = env.create_dir("dir"); @@ -125,16 +125,16 @@ TEST_CASE(error_reporting) { directory_entry ent; std::error_code ec = GetTestEC(); - ent.assign(StaticEnv::DNE, ec); + ent.assign(static_env.DNE, ec); TEST_CHECK(ec); - TEST_REQUIRE(ent.path() == StaticEnv::DNE); + TEST_REQUIRE(ent.path() == static_env.DNE); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); ec = GetTestEC(); TEST_CHECK(ent.hard_link_count(ec) == uintmax_t(-1)); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - ExceptionChecker Checker(StaticEnv::DNE, + ExceptionChecker Checker(static_env.DNE, std::errc::no_such_file_or_directory, "directory_entry::hard_link_count"); TEST_CHECK_THROW_RESULT(filesystem_error, Checker, ent.hard_link_count()); @@ -144,20 +144,20 @@ TEST_CASE(error_reporting) { directory_entry ent; std::error_code ec = GetTestEC(); - uintmax_t expect_bad = hard_link_count(StaticEnv::BadSymlink, ec); + uintmax_t expect_bad = hard_link_count(static_env.BadSymlink, ec); TEST_CHECK(expect_bad == uintmax_t(-1)); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); ec = GetTestEC(); - ent.assign(StaticEnv::BadSymlink, ec); - TEST_REQUIRE(ent.path() == StaticEnv::BadSymlink); + ent.assign(static_env.BadSymlink, ec); + TEST_REQUIRE(ent.path() == static_env.BadSymlink); TEST_CHECK(!ec); ec = GetTestEC(); TEST_CHECK(ent.hard_link_count(ec) == expect_bad); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - ExceptionChecker Checker(StaticEnv::BadSymlink, + ExceptionChecker Checker(static_env.BadSymlink, std::errc::no_such_file_or_directory, "directory_entry::hard_link_count"); TEST_CHECK_THROW_RESULT(filesystem_error, Checker, ent.hard_link_count()); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp index de6c36c218f2b..98949ea06e0cb 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -84,6 +83,7 @@ TEST_CASE(basic) { TEST_CASE(error_reporting) { using namespace fs; + static_test_env static_env; scoped_test_env env; const path dir = env.create_dir("dir"); @@ -99,15 +99,15 @@ TEST_CASE(error_reporting) { directory_entry ent; std::error_code ec = GetTestEC(); - ent.assign(StaticEnv::DNE, ec); - TEST_REQUIRE(ent.path() == StaticEnv::DNE); + ent.assign(static_env.DNE, ec); + TEST_REQUIRE(ent.path() == static_env.DNE); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); ec = GetTestEC(); TEST_CHECK(ent.last_write_time(ec) == file_time_type::min()); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - ExceptionChecker Checker(StaticEnv::DNE, + ExceptionChecker Checker(static_env.DNE, std::errc::no_such_file_or_directory, "directory_entry::last_write_time"); TEST_CHECK_THROW_RESULT(filesystem_error, Checker, ent.last_write_time()); @@ -117,20 +117,20 @@ TEST_CASE(error_reporting) { directory_entry ent; std::error_code ec = GetTestEC(); - file_time_type expect_bad = last_write_time(StaticEnv::BadSymlink, ec); + file_time_type expect_bad = last_write_time(static_env.BadSymlink, ec); TEST_CHECK(expect_bad == file_time_type::min()); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); ec = GetTestEC(); - ent.assign(StaticEnv::BadSymlink, ec); - TEST_REQUIRE(ent.path() == StaticEnv::BadSymlink); + ent.assign(static_env.BadSymlink, ec); + TEST_REQUIRE(ent.path() == static_env.BadSymlink); TEST_CHECK(!ec); ec = GetTestEC(); TEST_CHECK(ent.last_write_time(ec) == expect_bad); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); - ExceptionChecker Checker(StaticEnv::BadSymlink, + ExceptionChecker Checker(static_env.BadSymlink, std::errc::no_such_file_or_directory, "directory_entry::last_write_time"); TEST_CHECK_THROW_RESULT(filesystem_error, Checker, ent.last_write_time()); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp index 8ea1fdbcbe6ec..796d47bfb5259 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp @@ -28,6 +28,7 @@ TEST_SUITE(directory_entry_status_testsuite) TEST_CASE(test_basic) { using namespace fs; + static_test_env static_env; { const fs::directory_entry e("foo"); std::error_code ec; @@ -36,8 +37,8 @@ TEST_CASE(test_basic) { static_assert(noexcept(e.status()) == false, ""); static_assert(noexcept(e.status(ec)) == true, ""); } - path TestCases[] = {StaticEnv::File, StaticEnv::Dir, StaticEnv::SymlinkToFile, - StaticEnv::DNE}; + path TestCases[] = {static_env.File, static_env.Dir, static_env.SymlinkToFile, + static_env.DNE}; for (const auto& p : TestCases) { const directory_entry e(p); std::error_code pec = GetTestEC(), eec = GetTestEC(1); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp index cff19bc583ff3..c8dd6c1335392 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp @@ -28,6 +28,7 @@ TEST_SUITE(directory_entry_obs_suite) TEST_CASE(test_signature) { using namespace fs; + static_test_env static_env; { const directory_entry e("foo"); std::error_code ec; @@ -36,8 +37,8 @@ TEST_CASE(test_signature) { static_assert(noexcept(e.symlink_status()) == false, ""); static_assert(noexcept(e.symlink_status(ec)) == true, ""); } - path TestCases[] = {StaticEnv::File, StaticEnv::Dir, StaticEnv::SymlinkToFile, - StaticEnv::DNE}; + path TestCases[] = {static_env.File, static_env.Dir, static_env.SymlinkToFile, + static_env.DNE}; for (const auto& p : TestCases) { const directory_entry e(p); std::error_code pec = GetTestEC(), eec = GetTestEC(1); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp index d01854c1e4037..5b7d709d9aece 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -43,7 +42,8 @@ TEST_CASE(test_copy_end_iterator) TEST_CASE(test_copy_valid_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const directory_iterator endIt{}; const directory_iterator it(testDir); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp index 8d33240791052..920ded9523da0 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -36,7 +35,8 @@ TEST_CASE(test_assignment_signature) TEST_CASE(test_copy_to_end_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const directory_iterator from(testDir); TEST_REQUIRE(from != directory_iterator{}); @@ -52,7 +52,8 @@ TEST_CASE(test_copy_to_end_iterator) TEST_CASE(test_copy_from_end_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const directory_iterator from{}; @@ -66,7 +67,8 @@ TEST_CASE(test_copy_from_end_iterator) TEST_CASE(test_copy_valid_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const directory_iterator endIt{}; directory_iterator it_obj(testDir); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp index a06c4a64c7ffa..206e7860f4877 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -60,11 +59,12 @@ TEST_CASE(test_constructor_signatures) TEST_CASE(test_construction_from_bad_path) { + static_test_env static_env; std::error_code ec; directory_options opts = directory_options::none; const directory_iterator endIt; - const path testPaths[] = { StaticEnv::DNE, StaticEnv::BadSymlink }; + const path testPaths[] = { static_env.DNE, static_env.BadSymlink }; for (path const& testPath : testPaths) { { @@ -169,9 +169,10 @@ TEST_CASE(test_open_on_empty_directory_equals_end) TEST_CASE(test_open_on_directory_succeeds) { - const path testDir = StaticEnv::Dir; - std::set dir_contents(std::begin(StaticEnv::DirIterationList), - std::end( StaticEnv::DirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + std::set dir_contents(static_env.DirIterationList.begin(), + static_env.DirIterationList.end()); const directory_iterator endIt{}; { @@ -190,7 +191,8 @@ TEST_CASE(test_open_on_directory_succeeds) TEST_CASE(test_open_on_file_fails) { - const path testFile = StaticEnv::File; + static_test_env static_env; + const path testFile = static_env.File; const directory_iterator endIt{}; { std::error_code ec; @@ -225,9 +227,10 @@ TEST_CASE(test_open_on_dot_dir) TEST_CASE(test_open_on_symlink) { - const path symlinkToDir = StaticEnv::SymlinkToDir; + static_test_env static_env; + const path symlinkToDir = static_env.SymlinkToDir; std::set dir_contents; - for (path const& p : StaticEnv::DirIterationList) { + for (path const& p : static_env.DirIterationList) { dir_contents.insert(p.filename()); } const directory_iterator endIt{}; diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp index c527013237ac6..f1f9a27a45f08 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -44,9 +43,10 @@ TEST_CASE(test_increment_signatures) TEST_CASE(test_prefix_increment) { - const path testDir = StaticEnv::Dir; - const std::set dir_contents(std::begin(StaticEnv::DirIterationList), - std::end( StaticEnv::DirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + const std::set dir_contents(static_env.DirIterationList.begin(), + static_env.DirIterationList.end()); const directory_iterator endIt{}; std::error_code ec; @@ -67,9 +67,10 @@ TEST_CASE(test_prefix_increment) TEST_CASE(test_postfix_increment) { - const path testDir = StaticEnv::Dir; - const std::set dir_contents(std::begin(StaticEnv::DirIterationList), - std::end( StaticEnv::DirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + const std::set dir_contents(static_env.DirIterationList.begin(), + static_env.DirIterationList.end()); const directory_iterator endIt{}; std::error_code ec; @@ -91,9 +92,10 @@ TEST_CASE(test_postfix_increment) TEST_CASE(test_increment_method) { - const path testDir = StaticEnv::Dir; - const std::set dir_contents(std::begin(StaticEnv::DirIterationList), - std::end( StaticEnv::DirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + const std::set dir_contents(static_env.DirIterationList.begin(), + static_env.DirIterationList.end()); const directory_iterator endIt{}; std::error_code ec; diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp index c4d0e23b46f8d..8b17383fe6c1e 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -46,7 +45,8 @@ TEST_CASE(test_move_end_iterator) TEST_CASE(test_move_valid_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const directory_iterator endIt{}; directory_iterator it(testDir); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp index bf737146d585c..93fa67b4230e3 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -42,7 +41,8 @@ TEST_CASE(test_assignment_signature) TEST_CASE(test_move_to_end_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; directory_iterator from(testDir); TEST_REQUIRE(from != directory_iterator{}); @@ -57,7 +57,8 @@ TEST_CASE(test_move_to_end_iterator) TEST_CASE(test_move_from_end_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; directory_iterator from{}; @@ -71,7 +72,8 @@ TEST_CASE(test_move_from_end_iterator) TEST_CASE(test_move_valid_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const directory_iterator endIt{}; directory_iterator it(testDir); @@ -101,9 +103,10 @@ TEST_CASE(test_returns_reference_to_self) TEST_CASE(test_self_move) { + static_test_env static_env; // Create two non-equal iterators that have exactly the same state. - directory_iterator it(StaticEnv::Dir); - directory_iterator it2(StaticEnv::Dir); + directory_iterator it(static_env.Dir); + directory_iterator it2(static_env.Dir); ++it; ++it2; TEST_CHECK(it != it2); TEST_CHECK(*it2 == *it); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp index 5b1205edc6dbc..c80458eeab8b4 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -43,9 +42,10 @@ TEST_CASE(test_function_signatures) TEST_CASE(test_ranged_for_loop) { - const path testDir = StaticEnv::Dir; - std::set dir_contents(std::begin(StaticEnv::DirIterationList), - std::end( StaticEnv::DirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + std::set dir_contents(static_env.DirIterationList.begin(), + static_env.DirIterationList.end()); std::error_code ec; directory_iterator it(testDir, ec); diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp index bff2a18dd70a5..09a53e46e263a 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -44,7 +43,8 @@ TEST_CASE(test_copy_end_iterator) TEST_CASE(test_copy_valid_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const recursive_directory_iterator endIt{}; // build 'it' up with "interesting" non-default state so we can test diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp index d7468a21c4ed9..e03e9f3880b83 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -28,7 +27,7 @@ using namespace fs; TEST_SUITE(recursive_directory_iterator_copy_assign_tests) -recursive_directory_iterator createInterestingIterator() +recursive_directory_iterator createInterestingIterator(const static_test_env &static_env) // Create an "interesting" iterator where all fields are // in a non-default state. The returned 'it' is in a // state such that: @@ -36,7 +35,7 @@ recursive_directory_iterator createInterestingIterator() // it.depth() == 1 // it.recursion_pending() == true { - const path testDir = StaticEnv::Dir; + const path testDir = static_env.Dir; const recursive_directory_iterator endIt; recursive_directory_iterator it(testDir, directory_options::skip_permission_denied); @@ -51,7 +50,7 @@ recursive_directory_iterator createInterestingIterator() } -recursive_directory_iterator createDifferentInterestingIterator() +recursive_directory_iterator createDifferentInterestingIterator(const static_test_env &static_env) // Create an "interesting" iterator where all fields are // in a non-default state. The returned 'it' is in a // state such that: @@ -59,7 +58,7 @@ recursive_directory_iterator createDifferentInterestingIterator() // it.depth() == 2 // it.recursion_pending() == false { - const path testDir = StaticEnv::Dir; + const path testDir = static_env.Dir; const recursive_directory_iterator endIt; recursive_directory_iterator it(testDir, directory_options::follow_directory_symlink); @@ -79,9 +78,10 @@ TEST_CASE(test_assignment_signature) { TEST_CASE(test_copy_to_end_iterator) { + static_test_env static_env; const recursive_directory_iterator endIt; - const recursive_directory_iterator from = createInterestingIterator(); + const recursive_directory_iterator from = createInterestingIterator(static_env); const path entry = *from; recursive_directory_iterator to; @@ -96,8 +96,9 @@ TEST_CASE(test_copy_to_end_iterator) TEST_CASE(test_copy_from_end_iterator) { + static_test_env static_env; const recursive_directory_iterator from; - recursive_directory_iterator to = createInterestingIterator(); + recursive_directory_iterator to = createInterestingIterator(static_env); to = from; TEST_REQUIRE(to == from); @@ -106,12 +107,13 @@ TEST_CASE(test_copy_from_end_iterator) TEST_CASE(test_copy_valid_iterator) { + static_test_env static_env; const recursive_directory_iterator endIt; - const recursive_directory_iterator it = createInterestingIterator(); + const recursive_directory_iterator it = createInterestingIterator(static_env); const path entry = *it; - recursive_directory_iterator it2 = createDifferentInterestingIterator(); + recursive_directory_iterator it2 = createDifferentInterestingIterator(static_env); TEST_REQUIRE(it2 != it); TEST_CHECK(it2.options() != it.options()); TEST_CHECK(it2.depth() != it.depth()); @@ -136,9 +138,10 @@ TEST_CASE(test_returns_reference_to_self) TEST_CASE(test_self_copy) { + static_test_env static_env; // Create two non-equal iterators that have exactly the same state. - recursive_directory_iterator it = createInterestingIterator(); - recursive_directory_iterator it2 = createInterestingIterator(); + recursive_directory_iterator it = createInterestingIterator(static_env); + recursive_directory_iterator it2 = createInterestingIterator(static_env); TEST_CHECK(it != it2); TEST_CHECK(it2.options() == it.options()); TEST_CHECK(it2.depth() == it.depth()); diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp index 177edb46e058d..40f89df4e81a8 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -61,11 +60,12 @@ TEST_CASE(test_constructor_signatures) TEST_CASE(test_construction_from_bad_path) { + static_test_env static_env; std::error_code ec; directory_options opts = directory_options::none; const RDI endIt; - const path testPaths[] = { StaticEnv::DNE, StaticEnv::BadSymlink }; + const path testPaths[] = { static_env.DNE, static_env.BadSymlink }; for (path const& testPath : testPaths) { { @@ -171,9 +171,10 @@ TEST_CASE(test_open_on_empty_directory_equals_end) TEST_CASE(test_open_on_directory_succeeds) { - const path testDir = StaticEnv::Dir; - std::set dir_contents(std::begin(StaticEnv::DirIterationList), - std::end( StaticEnv::DirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + std::set dir_contents(static_env.DirIterationList.begin(), + static_env.DirIterationList.end()); const RDI endIt{}; { @@ -192,7 +193,8 @@ TEST_CASE(test_open_on_directory_succeeds) TEST_CASE(test_open_on_file_fails) { - const path testFile = StaticEnv::File; + static_test_env static_env; + const path testFile = static_env.File; const RDI endIt{}; { std::error_code ec; @@ -207,8 +209,9 @@ TEST_CASE(test_open_on_file_fails) TEST_CASE(test_options_post_conditions) { - const path goodDir = StaticEnv::Dir; - const path badDir = StaticEnv::DNE; + static_test_env static_env; + const path goodDir = static_env.Dir; + const path badDir = static_env.DNE; { std::error_code ec; diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp index da6c10a88cc99..bd12ec8885668 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -30,9 +29,10 @@ TEST_SUITE(recursive_directory_iterator_depth_tests) TEST_CASE(test_depth) { - const path testDir = StaticEnv::Dir; - const path DirDepth1 = StaticEnv::Dir2; - const path DirDepth2 = StaticEnv::Dir3; + static_test_env static_env; + const path testDir = static_env.Dir; + const path DirDepth1 = static_env.Dir2; + const path DirDepth2 = static_env.Dir3; const recursive_directory_iterator endIt{}; std::error_code ec; diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp index 5d29b7e32f60f..568563984dd49 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -32,7 +31,8 @@ TEST_SUITE(recursive_directory_iterator_disable_recursion_pending_tests) // in the 'recursion_pending()' tests. TEST_CASE(basic_test) { - recursive_directory_iterator it(StaticEnv::Dir); + static_test_env static_env; + recursive_directory_iterator it(static_env.Dir); TEST_REQUIRE(it.recursion_pending() == true); it.disable_recursion_pending(); TEST_CHECK(it.recursion_pending() == false); diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp index 1c7650543396b..3c0127e2a3189 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -43,9 +42,10 @@ TEST_CASE(test_increment_signatures) TEST_CASE(test_prefix_increment) { - const path testDir = StaticEnv::Dir; - const std::set dir_contents(std::begin(StaticEnv::RecDirIterationList), - std::end( StaticEnv::RecDirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + const std::set dir_contents(static_env.RecDirIterationList.begin(), + static_env.RecDirIterationList.end()); const recursive_directory_iterator endIt{}; std::error_code ec; @@ -66,9 +66,10 @@ TEST_CASE(test_prefix_increment) TEST_CASE(test_postfix_increment) { - const path testDir = StaticEnv::Dir; - const std::set dir_contents(std::begin(StaticEnv::RecDirIterationList), - std::end( StaticEnv::RecDirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + const std::set dir_contents(static_env.RecDirIterationList.begin(), + static_env.RecDirIterationList.end()); const recursive_directory_iterator endIt{}; std::error_code ec; @@ -89,9 +90,10 @@ TEST_CASE(test_postfix_increment) TEST_CASE(test_increment_method) { - const path testDir = StaticEnv::Dir; - const std::set dir_contents(std::begin(StaticEnv::RecDirIterationList), - std::end( StaticEnv::RecDirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + const std::set dir_contents(static_env.RecDirIterationList.begin(), + static_env.RecDirIterationList.end()); const recursive_directory_iterator endIt{}; std::error_code ec; @@ -113,10 +115,11 @@ TEST_CASE(test_increment_method) TEST_CASE(test_follow_symlinks) { - const path testDir = StaticEnv::Dir; - auto const& IterList = StaticEnv::RecDirFollowSymlinksIterationList; + static_test_env static_env; + const path testDir = static_env.Dir; + auto const& IterList = static_env.RecDirFollowSymlinksIterationList; - const std::set dir_contents(std::begin(IterList), std::end(IterList)); + const std::set dir_contents(IterList.begin(), IterList.end()); const recursive_directory_iterator endIt{}; std::error_code ec; diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp index d709012bef8cc..a944478284e62 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -46,7 +45,8 @@ TEST_CASE(test_move_end_iterator) TEST_CASE(test_move_valid_iterator) { - const path testDir = StaticEnv::Dir; + static_test_env static_env; + const path testDir = static_env.Dir; const recursive_directory_iterator endIt{}; // build 'it' up with "interesting" non-default state so we can test diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp index 2dee77c71731e..3dc2bb758b899 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -34,7 +33,7 @@ using namespace fs; TEST_SUITE(recursive_directory_iterator_move_assign_tests) -recursive_directory_iterator createInterestingIterator() +recursive_directory_iterator createInterestingIterator(const static_test_env &static_env) // Create an "interesting" iterator where all fields are // in a non-default state. The returned 'it' is in a // state such that: @@ -42,7 +41,7 @@ recursive_directory_iterator createInterestingIterator() // it.depth() == 1 // it.recursion_pending() == true { - const path testDir = StaticEnv::Dir; + const path testDir = static_env.Dir; const recursive_directory_iterator endIt; recursive_directory_iterator it(testDir, directory_options::skip_permission_denied); @@ -56,7 +55,7 @@ recursive_directory_iterator createInterestingIterator() return it; } -recursive_directory_iterator createDifferentInterestingIterator() +recursive_directory_iterator createDifferentInterestingIterator(const static_test_env &static_env) // Create an "interesting" iterator where all fields are // in a non-default state. The returned 'it' is in a // state such that: @@ -64,7 +63,7 @@ recursive_directory_iterator createDifferentInterestingIterator() // it.depth() == 2 // it.recursion_pending() == false { - const path testDir = StaticEnv::Dir; + const path testDir = static_env.Dir; const recursive_directory_iterator endIt; recursive_directory_iterator it(testDir, directory_options::follow_directory_symlink); @@ -87,9 +86,10 @@ TEST_CASE(test_assignment_signature) TEST_CASE(test_move_to_end_iterator) { + static_test_env static_env; const recursive_directory_iterator endIt; - recursive_directory_iterator from = createInterestingIterator(); + recursive_directory_iterator from = createInterestingIterator(static_env); const recursive_directory_iterator from_copy(from); const path entry = *from; @@ -106,8 +106,9 @@ TEST_CASE(test_move_to_end_iterator) TEST_CASE(test_move_from_end_iterator) { + static_test_env static_env; recursive_directory_iterator from; - recursive_directory_iterator to = createInterestingIterator(); + recursive_directory_iterator to = createInterestingIterator(static_env); to = std::move(from); TEST_REQUIRE(to == from); @@ -116,13 +117,14 @@ TEST_CASE(test_move_from_end_iterator) TEST_CASE(test_move_valid_iterator) { + static_test_env static_env; const recursive_directory_iterator endIt; - recursive_directory_iterator it = createInterestingIterator(); + recursive_directory_iterator it = createInterestingIterator(static_env); const recursive_directory_iterator it_copy(it); const path entry = *it; - recursive_directory_iterator it2 = createDifferentInterestingIterator(); + recursive_directory_iterator it2 = createDifferentInterestingIterator(static_env); const recursive_directory_iterator it2_copy(it2); TEST_REQUIRE(it2 != it); TEST_CHECK(it2.options() != it.options()); @@ -149,9 +151,10 @@ TEST_CASE(test_returns_reference_to_self) TEST_CASE(test_self_move) { + static_test_env static_env; // Create two non-equal iterators that have exactly the same state. - recursive_directory_iterator it = createInterestingIterator(); - recursive_directory_iterator it2 = createInterestingIterator(); + recursive_directory_iterator it = createInterestingIterator(static_env); + recursive_directory_iterator it2 = createInterestingIterator(static_env); TEST_CHECK(it != it2); TEST_CHECK(it2.options() == it.options()); TEST_CHECK(it2.depth() == it.depth()); diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp index c6ecdae56db3c..2d4edc97c549d 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -40,16 +39,17 @@ TEST_CASE(signature_tests) // seen files at each depth to determine the new depth after a 'pop()' operation. TEST_CASE(test_depth) { + static_test_env static_env; const recursive_directory_iterator endIt{}; - auto& DE0 = StaticEnv::DirIterationList; - std::set notSeenDepth0(std::begin(DE0), std::end(DE0)); + auto& DE0 = static_env.DirIterationList; + std::set notSeenDepth0(DE0.begin(), DE0.end()); - auto& DE1 = StaticEnv::DirIterationListDepth1; - std::set notSeenDepth1(std::begin(DE1), std::end(DE1)); + auto& DE1 = static_env.DirIterationListDepth1; + std::set notSeenDepth1(DE1.begin(), DE1.end()); std::error_code ec; - recursive_directory_iterator it(StaticEnv::Dir, ec); + recursive_directory_iterator it(static_env.Dir, ec); TEST_REQUIRE(it != endIt); TEST_CHECK(it.depth() == 0); diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp index 6bb7b939554a1..731766e262b38 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -30,14 +29,16 @@ TEST_SUITE(recursive_directory_iterator_recursion_pending_tests) TEST_CASE(initial_value_test) { - recursive_directory_iterator it(StaticEnv::Dir); + static_test_env static_env; + recursive_directory_iterator it(static_env.Dir); TEST_REQUIRE(it.recursion_pending() == true); } TEST_CASE(value_after_copy_construction_and_assignment_test) { - recursive_directory_iterator rec_pending_it(StaticEnv::Dir); - recursive_directory_iterator no_rec_pending_it(StaticEnv::Dir); + static_test_env static_env; + recursive_directory_iterator rec_pending_it(static_env.Dir); + recursive_directory_iterator no_rec_pending_it(static_env.Dir); no_rec_pending_it.disable_recursion_pending(); { // copy construction @@ -50,14 +51,14 @@ TEST_CASE(value_after_copy_construction_and_assignment_test) TEST_CHECK(it2.recursion_pending() == false); } { // copy assignment - recursive_directory_iterator it(StaticEnv::Dir); + recursive_directory_iterator it(static_env.Dir); it.disable_recursion_pending(); it = rec_pending_it; TEST_CHECK(it.recursion_pending() == true); it.disable_recursion_pending(); TEST_REQUIRE(rec_pending_it.recursion_pending() == true); - recursive_directory_iterator it2(StaticEnv::Dir); + recursive_directory_iterator it2(static_env.Dir); it2 = no_rec_pending_it; TEST_CHECK(it2.recursion_pending() == false); } @@ -68,8 +69,9 @@ TEST_CASE(value_after_copy_construction_and_assignment_test) TEST_CASE(value_after_move_construction_and_assignment_test) { - recursive_directory_iterator rec_pending_it(StaticEnv::Dir); - recursive_directory_iterator no_rec_pending_it(StaticEnv::Dir); + static_test_env static_env; + recursive_directory_iterator rec_pending_it(static_env.Dir); + recursive_directory_iterator no_rec_pending_it(static_env.Dir); no_rec_pending_it.disable_recursion_pending(); { // move construction @@ -82,13 +84,13 @@ TEST_CASE(value_after_move_construction_and_assignment_test) TEST_CHECK(it2.recursion_pending() == false); } { // copy assignment - recursive_directory_iterator it(StaticEnv::Dir); + recursive_directory_iterator it(static_env.Dir); it.disable_recursion_pending(); recursive_directory_iterator it_cp(rec_pending_it); it = std::move(it_cp); TEST_CHECK(it.recursion_pending() == true); - recursive_directory_iterator it2(StaticEnv::Dir); + recursive_directory_iterator it2(static_env.Dir); recursive_directory_iterator it_cp2(no_rec_pending_it); it2 = std::move(it_cp2); TEST_CHECK(it2.recursion_pending() == false); @@ -99,9 +101,10 @@ TEST_CASE(value_after_move_construction_and_assignment_test) TEST_CASE(increment_resets_value) { + static_test_env static_env; const recursive_directory_iterator endIt; { - recursive_directory_iterator it(StaticEnv::Dir); + recursive_directory_iterator it(static_env.Dir); it.disable_recursion_pending(); TEST_CHECK(it.recursion_pending() == false); ++it; @@ -109,7 +112,7 @@ TEST_CASE(increment_resets_value) TEST_CHECK(it.depth() == 0); } { - recursive_directory_iterator it(StaticEnv::Dir); + recursive_directory_iterator it(static_env.Dir); it.disable_recursion_pending(); TEST_CHECK(it.recursion_pending() == false); it++; @@ -117,7 +120,7 @@ TEST_CASE(increment_resets_value) TEST_CHECK(it.depth() == 0); } { - recursive_directory_iterator it(StaticEnv::Dir); + recursive_directory_iterator it(static_env.Dir); it.disable_recursion_pending(); TEST_CHECK(it.recursion_pending() == false); std::error_code ec; @@ -129,12 +132,13 @@ TEST_CASE(increment_resets_value) TEST_CASE(pop_does_not_reset_value) { + static_test_env static_env; const recursive_directory_iterator endIt; - auto& DE0 = StaticEnv::DirIterationList; - std::set notSeenDepth0(std::begin(DE0), std::end(DE0)); + auto& DE0 = static_env.DirIterationList; + std::set notSeenDepth0(DE0.begin(), DE0.end()); - recursive_directory_iterator it(StaticEnv::Dir); + recursive_directory_iterator it(static_env.Dir); TEST_REQUIRE(it != endIt); while (it.depth() == 0) { diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp index 24eaf84c2931a..1a076f3a3ed58 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -43,9 +42,10 @@ TEST_CASE(test_function_signatures) TEST_CASE(test_ranged_for_loop) { - const path testDir = StaticEnv::Dir; - std::set dir_contents(std::begin(StaticEnv::RecDirIterationList), - std::end( StaticEnv::RecDirIterationList)); + static_test_env static_env; + const path testDir = static_env.Dir; + std::set dir_contents(static_env.RecDirIterationList.begin(), + static_env.RecDirIterationList.end()); std::error_code ec; recursive_directory_iterator it(testDir, ec); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp index de2fa54aca482..bc3f581b828ec 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -24,15 +23,6 @@ using namespace fs; -struct CWDGuard { - path OldCWD; - CWDGuard() : OldCWD(fs::current_path()) { } - ~CWDGuard() { fs::current_path(OldCWD); } - - CWDGuard(CWDGuard const&) = delete; - CWDGuard& operator=(CWDGuard const&) = delete; -}; - TEST_SUITE(filesystem_canonical_path_test_suite) TEST_CASE(signature_test) @@ -47,29 +37,32 @@ TEST_CASE(signature_test) // Each scope tests one of the cases. TEST_CASE(test_canonical) { + static_test_env static_env; CWDGuard guard; // has_root_name() && has_root_directory() - const path Root = StaticEnv::Root; + const path Root = static_env.Root; const path RootName = Root.filename(); - const path DirName = StaticEnv::Dir.filename(); - const path SymlinkName = StaticEnv::SymlinkToFile.filename(); + const path DirName = static_env.Dir.filename(); + const path SymlinkName = static_env.SymlinkToFile.filename(); struct TestCase { path p; path expect; path base; - TestCase(path p1, path e, path b = StaticEnv::Root) + TestCase(path p1, path e, path b) : p(p1), expect(e), base(b) {} }; const TestCase testCases[] = { - { ".", Root, Root}, - { DirName / ".." / "." / DirName, StaticEnv::Dir, Root}, - { StaticEnv::Dir2 / "..", StaticEnv::Dir }, - { StaticEnv::Dir3 / "../..", StaticEnv::Dir }, - { StaticEnv::Dir / ".", StaticEnv::Dir }, - { Root / "." / DirName / ".." / DirName, StaticEnv::Dir}, - { path("..") / "." / RootName / DirName / ".." / DirName, StaticEnv::Dir, Root}, - { StaticEnv::SymlinkToFile, StaticEnv::File }, - { SymlinkName, StaticEnv::File, StaticEnv::Root} + { ".", Root, Root }, + { DirName / ".." / "." / DirName, static_env.Dir, Root }, + { static_env.Dir2 / "..", static_env.Dir, Root }, + { static_env.Dir3 / "../..", static_env.Dir, Root }, + { static_env.Dir / ".", static_env.Dir, Root }, + { Root / "." / DirName / ".." / DirName, static_env.Dir, Root }, + { path("..") / "." / RootName / DirName / ".." / DirName, + static_env.Dir, + Root }, + { static_env.SymlinkToFile, static_env.File, Root }, + { SymlinkName, static_env.File, Root} }; for (auto& TC : testCases) { std::error_code ec = GetTestEC(); @@ -85,21 +78,23 @@ TEST_CASE(test_canonical) TEST_CASE(test_dne_path) { + static_test_env static_env; std::error_code ec = GetTestEC(); { - const path ret = canonical(StaticEnv::DNE, ec); + const path ret = canonical(static_env.DNE, ec); TEST_CHECK(ec != GetTestEC()); TEST_REQUIRE(ec); TEST_CHECK(ret == path{}); } { - TEST_CHECK_THROW(filesystem_error, canonical(StaticEnv::DNE)); + TEST_CHECK_THROW(filesystem_error, canonical(static_env.DNE)); } } TEST_CASE(test_exception_contains_paths) { #ifndef TEST_HAS_NO_EXCEPTIONS + static_test_env static_env; CWDGuard guard; const path p = "blabla/dne"; try { @@ -110,13 +105,13 @@ TEST_CASE(test_exception_contains_paths) // libc++ provides the current path as the second path in the exception LIBCPP_ONLY(TEST_CHECK(err.path2() == current_path())); } - fs::current_path(StaticEnv::Dir); + fs::current_path(static_env.Dir); try { canonical(p); TEST_REQUIRE(false); } catch (filesystem_error const& err) { TEST_CHECK(err.path1() == p); - LIBCPP_ONLY(TEST_CHECK(err.path2() == StaticEnv::Dir)); + LIBCPP_ONLY(TEST_CHECK(err.path2() == static_env.Dir)); } #endif } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp index 82565433e3105..5ffbe1a51c15c 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp @@ -63,6 +63,7 @@ TEST_CASE(test_error_reporting) #endif }; + static_test_env static_env; scoped_test_env env; const path file = env.create_file("file1", 42); const path dir = env.create_dir("dir"); @@ -74,7 +75,7 @@ TEST_CASE(test_error_reporting) // !exists(f) { std::error_code ec = test_ec; - const path f = StaticEnv::DNE; + const path f = static_env.DNE; const path t = env.test_root; fs::copy(f, t, ec); TEST_REQUIRE(ec); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp index cae78c2f56140..f5ca9023d96b8 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -52,14 +51,18 @@ TEST_CASE(current_path_test) TEST_CASE(current_path_after_change_test) { - const path new_path = StaticEnv::Dir; + CWDGuard guard; + static_test_env static_env; + const path new_path = static_env.Dir; current_path(new_path); TEST_CHECK(current_path() == new_path); } TEST_CASE(current_path_is_file_test) { - const path p = StaticEnv::File; + CWDGuard guard; + static_test_env static_env; + const path p = static_env.File; std::error_code ec; const path old_p = current_path(); current_path(p, ec); @@ -69,14 +72,16 @@ TEST_CASE(current_path_is_file_test) TEST_CASE(set_to_non_absolute_path) { - const path base = StaticEnv::Dir; + CWDGuard guard; + static_test_env static_env; + const path base = static_env.Dir; current_path(base); - const path p = StaticEnv::Dir2.filename(); + const path p = static_env.Dir2.filename(); std::error_code ec; current_path(p, ec); TEST_CHECK(!ec); const path new_cwd = current_path(); - TEST_CHECK(new_cwd == StaticEnv::Dir2); + TEST_CHECK(new_cwd == static_env.Dir2); TEST_CHECK(new_cwd.is_absolute()); } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp index 71fce47266612..35f337d0b0a76 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -36,18 +35,19 @@ TEST_CASE(signature_test) { } TEST_CASE(equivalent_test) { + static_test_env static_env; struct TestCase { path lhs; path rhs; bool expect; }; const TestCase testCases[] = { - {StaticEnv::Dir, StaticEnv::Dir, true}, - {StaticEnv::File, StaticEnv::Dir, false}, - {StaticEnv::Dir, StaticEnv::SymlinkToDir, true}, - {StaticEnv::Dir, StaticEnv::SymlinkToFile, false}, - {StaticEnv::File, StaticEnv::File, true}, - {StaticEnv::File, StaticEnv::SymlinkToFile, true}, + {static_env.Dir, static_env.Dir, true}, + {static_env.File, static_env.Dir, false}, + {static_env.Dir, static_env.SymlinkToDir, true}, + {static_env.Dir, static_env.SymlinkToFile, false}, + {static_env.File, static_env.File, true}, + {static_env.File, static_env.SymlinkToFile, true}, }; for (auto& TC : testCases) { std::error_code ec; @@ -57,8 +57,9 @@ TEST_CASE(equivalent_test) { } TEST_CASE(equivalent_reports_error_if_input_dne) { - const path E = StaticEnv::File; - const path DNE = StaticEnv::DNE; + static_test_env static_env; + const path E = static_env.File; + const path DNE = static_env.DNE; { // Test that an error is reported when either of the paths don't exist std::error_code ec = GetTestEC(); TEST_CHECK(equivalent(E, DNE, ec) == false); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp index 45270d4cd0ec8..b51182b64b4f8 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(exists_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(exists(p) == false); std::error_code ec = GetTestEC(); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp index 6f27a4cb57124..0573aa5470d25 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -38,7 +37,8 @@ TEST_CASE(signature_test) TEST_CASE(file_size_empty_test) { - const path p = StaticEnv::EmptyFile; + static_test_env static_env; + const path p = static_env.EmptyFile; TEST_CHECK(file_size(p) == 0); std::error_code ec; TEST_CHECK(file_size(p, ec) == 0); @@ -55,21 +55,23 @@ TEST_CASE(file_size_non_empty) TEST_CASE(symlink_test_case) { - const path p = StaticEnv::File; - const path p2 = StaticEnv::SymlinkToFile; + static_test_env static_env; + const path p = static_env.File; + const path p2 = static_env.SymlinkToFile; TEST_CHECK(file_size(p) == file_size(p2)); } TEST_CASE(file_size_error_cases) { + static_test_env static_env; struct { path p; std::errc expected_err; } TestCases[] = { - {StaticEnv::Dir, std::errc::is_a_directory}, - {StaticEnv::SymlinkToDir, std::errc::is_a_directory}, - {StaticEnv::BadSymlink, std::errc::no_such_file_or_directory}, - {StaticEnv::DNE, std::errc::no_such_file_or_directory}, + {static_env.Dir, std::errc::is_a_directory}, + {static_env.SymlinkToDir, std::errc::is_a_directory}, + {static_env.BadSymlink, std::errc::no_such_file_or_directory}, + {static_env.DNE, std::errc::no_such_file_or_directory}, {"", std::errc::no_such_file_or_directory}}; const uintmax_t expect = static_cast(-1); for (auto& TC : TestCases) { diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp index 96da42be0bc76..82ac572071719 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -38,13 +37,15 @@ TEST_CASE(signature_test) TEST_CASE(hard_link_count_for_file) { - TEST_CHECK(hard_link_count(StaticEnv::File) == 1); + static_test_env static_env; + TEST_CHECK(hard_link_count(static_env.File) == 1); std::error_code ec; - TEST_CHECK(hard_link_count(StaticEnv::File, ec) == 1); + TEST_CHECK(hard_link_count(static_env.File, ec) == 1); } TEST_CASE(hard_link_count_for_directory) { + static_test_env static_env; uintmax_t DirExpect = 3; // hard link from . .. and Dir2 uintmax_t Dir3Expect = 2; // hard link from . .. uintmax_t DirExpectAlt = DirExpect; @@ -56,20 +57,20 @@ TEST_CASE(hard_link_count_for_directory) DirExpectAlt = 5; // . .. Dir2 file1 file2 Dir3Expect = 3; // . .. file5 #endif - TEST_CHECK(hard_link_count(StaticEnv::Dir) == DirExpect || - hard_link_count(StaticEnv::Dir) == DirExpectAlt || - hard_link_count(StaticEnv::Dir) == 1); - TEST_CHECK(hard_link_count(StaticEnv::Dir3) == Dir3Expect || - hard_link_count(StaticEnv::Dir3) == Dir3ExpectAlt || - hard_link_count(StaticEnv::Dir3) == 1); + TEST_CHECK(hard_link_count(static_env.Dir) == DirExpect || + hard_link_count(static_env.Dir) == DirExpectAlt || + hard_link_count(static_env.Dir) == 1); + TEST_CHECK(hard_link_count(static_env.Dir3) == Dir3Expect || + hard_link_count(static_env.Dir3) == Dir3ExpectAlt || + hard_link_count(static_env.Dir3) == 1); std::error_code ec; - TEST_CHECK(hard_link_count(StaticEnv::Dir, ec) == DirExpect || - hard_link_count(StaticEnv::Dir, ec) == DirExpectAlt || - hard_link_count(StaticEnv::Dir) == 1); - TEST_CHECK(hard_link_count(StaticEnv::Dir3, ec) == Dir3Expect || - hard_link_count(StaticEnv::Dir3, ec) == Dir3ExpectAlt || - hard_link_count(StaticEnv::Dir3) == 1); + TEST_CHECK(hard_link_count(static_env.Dir, ec) == DirExpect || + hard_link_count(static_env.Dir, ec) == DirExpectAlt || + hard_link_count(static_env.Dir) == 1); + TEST_CHECK(hard_link_count(static_env.Dir3, ec) == Dir3Expect || + hard_link_count(static_env.Dir3, ec) == Dir3ExpectAlt || + hard_link_count(static_env.Dir3) == 1); } TEST_CASE(hard_link_count_increments_test) { @@ -84,9 +85,10 @@ TEST_CASE(hard_link_count_increments_test) TEST_CASE(hard_link_count_error_cases) { + static_test_env static_env; const path testCases[] = { - StaticEnv::BadSymlink, - StaticEnv::DNE + static_env.BadSymlink, + static_env.DNE }; const uintmax_t expect = static_cast(-1); for (auto& TC : testCases) { diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp index 6cfbc340c44f4..c584605d6381d 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(is_block_file_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_block_file(p) == false); } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp index 12e66019b65c3..90690a211e6b1 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(is_character_file_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_character_file(p) == false); } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp index 35dd5d40f7b0d..1059a648641e8 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -63,15 +62,17 @@ TEST_CASE(is_directory_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_directory(p) == false); } TEST_CASE(static_env_test) { - TEST_CHECK(is_directory(StaticEnv::Dir)); - TEST_CHECK(is_directory(StaticEnv::SymlinkToDir)); - TEST_CHECK(!is_directory(StaticEnv::File)); + static_test_env static_env; + TEST_CHECK(is_directory(static_env.Dir)); + TEST_CHECK(is_directory(static_env.SymlinkToDir)); + TEST_CHECK(!is_directory(static_env.File)); } TEST_CASE(test_is_directory_fails) diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp index e3393240aa406..606cebbf437e9 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -36,7 +35,8 @@ TEST_CASE(signature_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; std::error_code ec; TEST_CHECK(is_empty(p, ec) == false); TEST_CHECK(ec); @@ -45,8 +45,9 @@ TEST_CASE(test_exist_not_found) TEST_CASE(test_is_empty_directory) { - TEST_CHECK(!is_empty(StaticEnv::Dir)); - TEST_CHECK(!is_empty(StaticEnv::SymlinkToDir)); + static_test_env static_env; + TEST_CHECK(!is_empty(static_env.Dir)); + TEST_CHECK(!is_empty(static_env.SymlinkToDir)); } TEST_CASE(test_is_empty_directory_dynamic) @@ -59,8 +60,9 @@ TEST_CASE(test_is_empty_directory_dynamic) TEST_CASE(test_is_empty_file) { - TEST_CHECK(is_empty(StaticEnv::EmptyFile)); - TEST_CHECK(!is_empty(StaticEnv::NonEmptyFile)); + static_test_env static_env; + TEST_CHECK(is_empty(static_env.EmptyFile)); + TEST_CHECK(!is_empty(static_env.NonEmptyFile)); } TEST_CASE(test_is_empty_fails) diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp index 4dcf043903641..1488a89165308 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(is_fifo_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_fifo(p) == false); } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp index 894cc57aeba26..1381eb5ae6e47 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(is_other_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_other(p) == false); } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp index 3fcf779b3213d..5c5e1ea07c750 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(is_regular_file_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_regular_file(p) == false); std::error_code ec; TEST_CHECK(is_regular_file(p, ec) == false); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp index ba727d6384c43..3f35c50f60117 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp @@ -62,7 +62,8 @@ TEST_CASE(is_socket_status_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_socket(p) == false); } diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp index 75ab6059f4d89..14da9cb7fa958 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -63,16 +62,17 @@ TEST_CASE(is_symlink_status_test) TEST_CASE(static_env_test) { + static_test_env static_env; struct TestCase { path p; bool expect; }; const TestCase testCases[] = { - {StaticEnv::File, false}, - {StaticEnv::Dir, false}, - {StaticEnv::SymlinkToFile, true}, - {StaticEnv::SymlinkToDir, true}, - {StaticEnv::BadSymlink, true} + {static_env.File, false}, + {static_env.Dir, false}, + {static_env.SymlinkToFile, true}, + {static_env.SymlinkToDir, true}, + {static_env.BadSymlink, true} }; for (auto& TC : testCases) { TEST_CHECK(is_symlink(TC.p) == TC.expect); @@ -81,7 +81,8 @@ TEST_CASE(static_env_test) TEST_CASE(test_exist_not_found) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; TEST_CHECK(is_symlink(p) == false); std::error_code ec; TEST_CHECK(is_symlink(p, ec) == false); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp index 3ef4c53303ad8..e71da60a65e92 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -350,27 +349,28 @@ TEST_CASE(signature_test) TEST_CASE(read_last_write_time_static_env_test) { + static_test_env static_env; using C = file_time_type::clock; file_time_type min = file_time_type::min(); { - file_time_type ret = last_write_time(StaticEnv::File); + file_time_type ret = last_write_time(static_env.File); TEST_CHECK(ret != min); TEST_CHECK(ret < C::now()); - TEST_CHECK(CompareTime(ret, LastWriteTime(StaticEnv::File))); + TEST_CHECK(CompareTime(ret, LastWriteTime(static_env.File))); - file_time_type ret2 = last_write_time(StaticEnv::SymlinkToFile); + file_time_type ret2 = last_write_time(static_env.SymlinkToFile); TEST_CHECK(CompareTime(ret, ret2)); - TEST_CHECK(CompareTime(ret2, LastWriteTime(StaticEnv::SymlinkToFile))); + TEST_CHECK(CompareTime(ret2, LastWriteTime(static_env.SymlinkToFile))); } { - file_time_type ret = last_write_time(StaticEnv::Dir); + file_time_type ret = last_write_time(static_env.Dir); TEST_CHECK(ret != min); TEST_CHECK(ret < C::now()); - TEST_CHECK(CompareTime(ret, LastWriteTime(StaticEnv::Dir))); + TEST_CHECK(CompareTime(ret, LastWriteTime(static_env.Dir))); - file_time_type ret2 = last_write_time(StaticEnv::SymlinkToDir); + file_time_type ret2 = last_write_time(static_env.SymlinkToDir); TEST_CHECK(CompareTime(ret, ret2)); - TEST_CHECK(CompareTime(ret2, LastWriteTime(StaticEnv::SymlinkToDir))); + TEST_CHECK(CompareTime(ret2, LastWriteTime(static_env.SymlinkToDir))); } } @@ -564,7 +564,8 @@ TEST_CASE(test_write_max_time) { TEST_CASE(test_value_on_failure) { - const path p = StaticEnv::DNE; + static_test_env static_env; + const path p = static_env.DNE; std::error_code ec = GetTestEC(); TEST_CHECK(last_write_time(p, ec) == file_time_type::min()); TEST_CHECK(ErrorIs(ec, std::errc::no_such_file_or_directory)); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp index 2a8d8296c861e..ffe2b3890f0f2 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -42,77 +41,89 @@ TEST_CASE(test_signature_1) { } TEST_CASE(test_signature_2) { - fs::path p(StaticEnv::File); + static_test_env static_env; + fs::path p(static_env.File); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::File)); + TEST_CHECK(output == std::string(static_env.File)); } TEST_CASE(test_signature_3) { - fs::path p(StaticEnv::Dir); + static_test_env static_env; + fs::path p(static_env.Dir); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir)); + TEST_CHECK(output == std::string(static_env.Dir)); } TEST_CASE(test_signature_4) { - fs::path p(StaticEnv::SymlinkToDir); + static_test_env static_env; + fs::path p(static_env.SymlinkToDir); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir)); + TEST_CHECK(output == std::string(static_env.Dir)); } TEST_CASE(test_signature_5) { - fs::path p(StaticEnv::SymlinkToDir / "dir2/."); + static_test_env static_env; + fs::path p(static_env.SymlinkToDir / "dir2/."); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir / "dir2")); + TEST_CHECK(output == std::string(static_env.Dir / "dir2")); } TEST_CASE(test_signature_6) { + static_test_env static_env; // FIXME? If the trailing separator occurs in a part of the path that exists, // it is omitted. Otherwise it is added to the end of the result. - fs::path p(StaticEnv::SymlinkToDir / "dir2/./"); + fs::path p(static_env.SymlinkToDir / "dir2/./"); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir / "dir2")); + TEST_CHECK(output == std::string(static_env.Dir / "dir2")); } TEST_CASE(test_signature_7) { - fs::path p(StaticEnv::SymlinkToDir / "dir2/DNE/./"); + static_test_env static_env; + fs::path p(static_env.SymlinkToDir / "dir2/DNE/./"); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir / "dir2/DNE/")); + TEST_CHECK(output == std::string(static_env.Dir / "dir2/DNE/")); } TEST_CASE(test_signature_8) { - fs::path p(StaticEnv::SymlinkToDir / "dir2"); + static_test_env static_env; + fs::path p(static_env.SymlinkToDir / "dir2"); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir2)); + TEST_CHECK(output == std::string(static_env.Dir2)); } TEST_CASE(test_signature_9) { - fs::path p(StaticEnv::SymlinkToDir / "dir2/../dir2/DNE/.."); + static_test_env static_env; + fs::path p(static_env.SymlinkToDir / "dir2/../dir2/DNE/.."); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir2 / "")); + TEST_CHECK(output == std::string(static_env.Dir2 / "")); } TEST_CASE(test_signature_10) { - fs::path p(StaticEnv::SymlinkToDir / "dir2/dir3/../DNE/DNE2"); + static_test_env static_env; + fs::path p(static_env.SymlinkToDir / "dir2/dir3/../DNE/DNE2"); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir2 / "DNE/DNE2")); + TEST_CHECK(output == std::string(static_env.Dir2 / "DNE/DNE2")); } TEST_CASE(test_signature_11) { - fs::path p(StaticEnv::Dir / "../dir1"); + static_test_env static_env; + fs::path p(static_env.Dir / "../dir1"); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir)); + TEST_CHECK(output == std::string(static_env.Dir)); } TEST_CASE(test_signature_12) { - fs::path p(StaticEnv::Dir / "./."); + static_test_env static_env; + fs::path p(static_env.Dir / "./."); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir)); + TEST_CHECK(output == std::string(static_env.Dir)); } TEST_CASE(test_signature_13) { - fs::path p(StaticEnv::Dir / "DNE/../foo"); + static_test_env static_env; + fs::path p(static_env.Dir / "DNE/../foo"); const fs::path output = fs::weakly_canonical(p); - TEST_CHECK(output == std::string(StaticEnv::Dir / "foo")); + TEST_CHECK(output == std::string(static_env.Dir / "foo")); } TEST_SUITE_END() diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp index 4a0936b920db4..6de0b10c6e549 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -45,6 +44,7 @@ TEST_CASE(signature_test) TEST_CASE(test_error_reporting) { + static_test_env static_env; auto checkThrow = [](path const& f, const std::error_code& ec) { #ifndef TEST_HAS_NO_EXCEPTIONS @@ -63,8 +63,8 @@ TEST_CASE(test_error_reporting) }; const path cases[] = { "", - StaticEnv::DNE, - StaticEnv::BadSymlink + static_env.DNE, + static_env.BadSymlink }; for (auto& p : cases) { const auto expect = static_cast(-1); @@ -80,11 +80,13 @@ TEST_CASE(test_error_reporting) TEST_CASE(basic_space_test) { + static_test_env static_env; + // All the test cases should reside on the same filesystem and therefore // should have the same expected result. Compute this expected result // one and check that it looks semi-sane. struct statvfs expect; - TEST_REQUIRE(::statvfs(StaticEnv::Dir.c_str(), &expect) != -1); + TEST_REQUIRE(::statvfs(static_env.Dir.c_str(), &expect) != -1); TEST_CHECK(expect.f_bavail > 0); TEST_CHECK(expect.f_bfree > 0); TEST_CHECK(expect.f_bsize > 0); @@ -106,11 +108,11 @@ TEST_CASE(basic_space_test) // Currently 5% of capacity const std::uintmax_t delta = expect_capacity / 20; const path cases[] = { - StaticEnv::File, - StaticEnv::Dir, - StaticEnv::Dir2, - StaticEnv::SymlinkToFile, - StaticEnv::SymlinkToDir + static_env.File, + static_env.Dir, + static_env.Dir2, + static_env.SymlinkToFile, + static_env.SymlinkToDir }; for (auto& p : cases) { std::error_code ec = GetTestEC(); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp index 60c99ee923d5a..af4ddb9c89061 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -34,11 +33,12 @@ TEST_CASE(signature_test) TEST_CASE(test_status_not_found) { + static_test_env static_env; const std::error_code expect_ec = std::make_error_code(std::errc::no_such_file_or_directory); const path cases[] { - StaticEnv::DNE, - StaticEnv::BadSymlink + static_env.DNE, + static_env.BadSymlink }; for (auto& p : cases) { std::error_code ec = std::make_error_code(std::errc::address_in_use); @@ -103,17 +103,18 @@ TEST_CASE(test_status_cannot_resolve) TEST_CASE(status_file_types_test) { + static_test_env static_env; scoped_test_env env; struct TestCase { path p; file_type expect_type; } cases[] = { - {StaticEnv::File, file_type::regular}, - {StaticEnv::SymlinkToFile, file_type::regular}, - {StaticEnv::Dir, file_type::directory}, - {StaticEnv::SymlinkToDir, file_type::directory}, + {static_env.File, file_type::regular}, + {static_env.SymlinkToFile, file_type::regular}, + {static_env.Dir, file_type::directory}, + {static_env.SymlinkToDir, file_type::directory}, // Block files tested elsewhere - {StaticEnv::CharFile, file_type::character}, + {static_env.CharFile, file_type::character}, #if !defined(__APPLE__) && !defined(__FreeBSD__) // No support for domain sockets {env.create_socket("socket"), file_type::socket}, #endif diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp index 350076fcf3709..e883739b7d5ba 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -34,10 +33,11 @@ TEST_CASE(signature_test) TEST_CASE(test_symlink_status_not_found) { + static_test_env static_env; const std::error_code expect_ec = std::make_error_code(std::errc::no_such_file_or_directory); const path cases[] { - StaticEnv::DNE + static_env.DNE }; for (auto& p : cases) { std::error_code ec = std::make_error_code(std::errc::address_in_use); @@ -110,18 +110,19 @@ TEST_CASE(test_symlink_status_cannot_resolve) TEST_CASE(symlink_status_file_types_test) { + static_test_env static_env; scoped_test_env env; struct TestCase { path p; file_type expect_type; } cases[] = { - {StaticEnv::BadSymlink, file_type::symlink}, - {StaticEnv::File, file_type::regular}, - {StaticEnv::SymlinkToFile, file_type::symlink}, - {StaticEnv::Dir, file_type::directory}, - {StaticEnv::SymlinkToDir, file_type::symlink}, + {static_env.BadSymlink, file_type::symlink}, + {static_env.File, file_type::regular}, + {static_env.SymlinkToFile, file_type::symlink}, + {static_env.Dir, file_type::directory}, + {static_env.SymlinkToDir, file_type::symlink}, // Block files tested elsewhere - {StaticEnv::CharFile, file_type::character}, + {static_env.CharFile, file_type::character}, #if !defined(__APPLE__) && !defined(__FreeBSD__) // No support for domain sockets {env.create_socket("socket"), file_type::socket}, #endif diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp index 35d97b8703d59..c2c86bc35a6b4 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// FILE_DEPENDENCIES: ../../Inputs/static_test_env // UNSUPPORTED: c++98, c++03 // @@ -27,6 +26,9 @@ int main(int, char**) { + + static_test_env static_env; + // clang-format off struct { std::string input; @@ -41,20 +43,20 @@ int main(int, char**) { {"a/b", fs::current_path() / "a/b"}, {"a", fs::current_path() / "a"}, {"a/b/", fs::current_path() / "a/b/"}, - {StaticEnv::File, StaticEnv::File}, - {StaticEnv::Dir, StaticEnv::Dir}, - {StaticEnv::SymlinkToDir, StaticEnv::Dir}, - {StaticEnv::SymlinkToDir / "dir2/.", StaticEnv::Dir / "dir2"}, + {static_env.File, static_env.File}, + {static_env.Dir, static_env.Dir}, + {static_env.SymlinkToDir, static_env.Dir}, + {static_env.SymlinkToDir / "dir2/.", static_env.Dir / "dir2"}, // FIXME? If the trailing separator occurs in a part of the path that exists, // it is omitted. Otherwise it is added to the end of the result. - {StaticEnv::SymlinkToDir / "dir2/./", StaticEnv::Dir / "dir2"}, - {StaticEnv::SymlinkToDir / "dir2/DNE/./", StaticEnv::Dir / "dir2/DNE/"}, - {StaticEnv::SymlinkToDir / "dir2", StaticEnv::Dir2}, - {StaticEnv::SymlinkToDir / "dir2/../dir2/DNE/..", StaticEnv::Dir2 / ""}, - {StaticEnv::SymlinkToDir / "dir2/dir3/../DNE/DNE2", StaticEnv::Dir2 / "DNE/DNE2"}, - {StaticEnv::Dir / "../dir1", StaticEnv::Dir}, - {StaticEnv::Dir / "./.", StaticEnv::Dir}, - {StaticEnv::Dir / "DNE/../foo", StaticEnv::Dir / "foo"} + {static_env.SymlinkToDir / "dir2/./", static_env.Dir / "dir2"}, + {static_env.SymlinkToDir / "dir2/DNE/./", static_env.Dir / "dir2/DNE/"}, + {static_env.SymlinkToDir / "dir2", static_env.Dir2}, + {static_env.SymlinkToDir / "dir2/../dir2/DNE/..", static_env.Dir2 / ""}, + {static_env.SymlinkToDir / "dir2/dir3/../DNE/DNE2", static_env.Dir2 / "DNE/DNE2"}, + {static_env.Dir / "../dir1", static_env.Dir}, + {static_env.Dir / "./.", static_env.Dir}, + {static_env.Dir / "DNE/../foo", static_env.Dir / "foo"} }; // clang-format on int ID = 0; diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h index 8553317de76f6..5cccca9f0493e 100644 --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -3,7 +3,8 @@ #include "filesystem_include.h" -#include // for ftruncate +#include // for mkdir, mkfifo +#include // for ftruncate, link, symlink, getcwd, chdir #include #include // for printf @@ -24,89 +25,6 @@ # include #endif -// static test helpers - -namespace StaticEnv { - -// Tests that use these utilities should add '<...>/Inputs/static_test_env' -// to their FILE_DEPENDENCIES, to make sure the directory is made available -// to the test. Assuming that, the 'static_test_env' will be available in the -// directory where the test is run. -static const fs::path Root = fs::current_path() / "static_test_env"; - -inline fs::path makePath(fs::path const& p) { - // env_path is expected not to contain symlinks. - fs::path const& env_path = Root; - return env_path / p; -} - -static const fs::path TestFileList[] = { - makePath("empty_file"), - makePath("non_empty_file"), - makePath("dir1/file1"), - makePath("dir1/file2") -}; -const std::size_t TestFileListSize = sizeof(TestFileList) / sizeof(fs::path); - -static const fs::path TestDirList[] = { - makePath("dir1"), - makePath("dir1/dir2"), - makePath("dir1/dir2/dir3") -}; -const std::size_t TestDirListSize = sizeof(TestDirList) / sizeof(fs::path); - -static const fs::path File = TestFileList[0]; -static const fs::path Dir = TestDirList[0]; -static const fs::path Dir2 = TestDirList[1]; -static const fs::path Dir3 = TestDirList[2]; -static const fs::path SymlinkToFile = makePath("symlink_to_empty_file"); -static const fs::path SymlinkToDir = makePath("symlink_to_dir"); -static const fs::path BadSymlink = makePath("bad_symlink"); -static const fs::path DNE = makePath("DNE"); -static const fs::path EmptyFile = TestFileList[0]; -static const fs::path NonEmptyFile = TestFileList[1]; -static const fs::path CharFile = "/dev/null"; // Hopefully this exists - -static const fs::path DirIterationList[] = { - makePath("dir1/dir2"), - makePath("dir1/file1"), - makePath("dir1/file2") -}; -const std::size_t DirIterationListSize = sizeof(DirIterationList) - / sizeof(fs::path); - -static const fs::path DirIterationListDepth1[] = { - makePath("dir1/dir2/afile3"), - makePath("dir1/dir2/dir3"), - makePath("dir1/dir2/symlink_to_dir3"), - makePath("dir1/dir2/file4"), -}; - -static const fs::path RecDirIterationList[] = { - makePath("dir1/dir2"), - makePath("dir1/file1"), - makePath("dir1/file2"), - makePath("dir1/dir2/afile3"), - makePath("dir1/dir2/dir3"), - makePath("dir1/dir2/symlink_to_dir3"), - makePath("dir1/dir2/file4"), - makePath("dir1/dir2/dir3/file5") -}; - -static const fs::path RecDirFollowSymlinksIterationList[] = { - makePath("dir1/dir2"), - makePath("dir1/file1"), - makePath("dir1/file2"), - makePath("dir1/dir2/afile3"), - makePath("dir1/dir2/dir3"), - makePath("dir1/dir2/file4"), - makePath("dir1/dir2/dir3/file5"), - makePath("dir1/dir2/symlink_to_dir3"), - makePath("dir1/dir2/symlink_to_dir3/file5"), -}; - -} // namespace StaticEnv - namespace random_utils { inline char to_hex(int ch) { return ch < 10 ? static_cast('0' + ch) @@ -207,17 +125,18 @@ struct scoped_test_env std::string create_dir(std::string filename) { filename = sanitize_path(std::move(filename)); - std::string cmd = "mkdir " + filename; - int ret = std::system(cmd.c_str()); + int ret = ::mkdir(filename.c_str(), 0777); // rwxrwxrwx mode assert(ret == 0); return filename; } - std::string create_symlink(std::string source, std::string to) { - source = sanitize_path(std::move(source)); + std::string create_symlink(std::string source, + std::string to, + bool sanitize_source = true) { + if (sanitize_source) + source = sanitize_path(std::move(source)); to = sanitize_path(std::move(to)); - std::string cmd = "ln -s " + source + ' ' + to; - int ret = std::system(cmd.c_str()); + int ret = ::symlink(source.c_str(), to.c_str()); assert(ret == 0); return to; } @@ -225,16 +144,14 @@ struct scoped_test_env std::string create_hardlink(std::string source, std::string to) { source = sanitize_path(std::move(source)); to = sanitize_path(std::move(to)); - std::string cmd = "ln " + source + ' ' + to; - int ret = std::system(cmd.c_str()); + int ret = ::link(source.c_str(), to.c_str()); assert(ret == 0); return to; } std::string create_fifo(std::string file) { file = sanitize_path(std::move(file)); - std::string cmd = "mkfifo " + file; - int ret = std::system(cmd.c_str()); + int ret = ::mkfifo(file.c_str(), 0666); // rw-rw-rw- mode assert(ret == 0); return file; } @@ -276,6 +193,131 @@ struct scoped_test_env } }; +/// This class generates the following tree: +/// +/// static_test_env +/// ├── bad_symlink -> dne +/// ├── dir1 +/// │   ├── dir2 +/// │   │   ├── afile3 +/// │   │   ├── dir3 +/// │   │   │   └── file5 +/// │   │   ├── file4 +/// │   │   └── symlink_to_dir3 -> dir3 +/// │   ├── file1 +/// │   └── file2 +/// ├── empty_file +/// ├── non_empty_file +/// ├── symlink_to_dir -> dir1 +/// └── symlink_to_empty_file -> empty_file +/// +class static_test_env { + scoped_test_env env_; +public: + static_test_env() { + env_.create_symlink("dne", "bad_symlink", false); + env_.create_dir("dir1"); + env_.create_dir("dir1/dir2"); + env_.create_file("dir1/dir2/afile3"); + env_.create_dir("dir1/dir2/dir3"); + env_.create_file("dir1/dir2/dir3/file5"); + env_.create_file("dir1/dir2/file4"); + env_.create_symlink("dir3", "dir1/dir2/symlink_to_dir3", false); + env_.create_file("dir1/file1"); + env_.create_file("dir1/file2", 42); + env_.create_file("empty_file"); + env_.create_file("non_empty_file", 42); + env_.create_symlink("dir1", "symlink_to_dir", false); + env_.create_symlink("empty_file", "symlink_to_empty_file", false); + } + + const fs::path Root = env_.test_root; + + fs::path makePath(fs::path const& p) const { + // env_path is expected not to contain symlinks. + fs::path const& env_path = Root; + return env_path / p; + } + + const std::vector TestFileList = { + makePath("empty_file"), + makePath("non_empty_file"), + makePath("dir1/file1"), + makePath("dir1/file2") + }; + + const std::vector TestDirList = { + makePath("dir1"), + makePath("dir1/dir2"), + makePath("dir1/dir2/dir3") + }; + + const fs::path File = TestFileList[0]; + const fs::path Dir = TestDirList[0]; + const fs::path Dir2 = TestDirList[1]; + const fs::path Dir3 = TestDirList[2]; + const fs::path SymlinkToFile = makePath("symlink_to_empty_file"); + const fs::path SymlinkToDir = makePath("symlink_to_dir"); + const fs::path BadSymlink = makePath("bad_symlink"); + const fs::path DNE = makePath("DNE"); + const fs::path EmptyFile = TestFileList[0]; + const fs::path NonEmptyFile = TestFileList[1]; + const fs::path CharFile = "/dev/null"; // Hopefully this exists + + const std::vector DirIterationList = { + makePath("dir1/dir2"), + makePath("dir1/file1"), + makePath("dir1/file2") + }; + + const std::vector DirIterationListDepth1 = { + makePath("dir1/dir2/afile3"), + makePath("dir1/dir2/dir3"), + makePath("dir1/dir2/symlink_to_dir3"), + makePath("dir1/dir2/file4"), + }; + + const std::vector RecDirIterationList = { + makePath("dir1/dir2"), + makePath("dir1/file1"), + makePath("dir1/file2"), + makePath("dir1/dir2/afile3"), + makePath("dir1/dir2/dir3"), + makePath("dir1/dir2/symlink_to_dir3"), + makePath("dir1/dir2/file4"), + makePath("dir1/dir2/dir3/file5") + }; + + const std::vector RecDirFollowSymlinksIterationList = { + makePath("dir1/dir2"), + makePath("dir1/file1"), + makePath("dir1/file2"), + makePath("dir1/dir2/afile3"), + makePath("dir1/dir2/dir3"), + makePath("dir1/dir2/file4"), + makePath("dir1/dir2/dir3/file5"), + makePath("dir1/dir2/symlink_to_dir3"), + makePath("dir1/dir2/symlink_to_dir3/file5"), + }; +}; + +struct CWDGuard { + // Assume that path lengths are not greater than this. + // This should be fine for testing purposes. + char OldCWD[4096]; + CWDGuard() { + char* ret = ::getcwd(OldCWD, sizeof(OldCWD)); + assert(ret && "getcwd failed"); + } + ~CWDGuard() { + int ret = ::chdir(OldCWD); + assert(ret == 0 && "chdir failed"); + } + + CWDGuard(CWDGuard const&) = delete; + CWDGuard& operator=(CWDGuard const&) = delete; +}; + // Misc test types #define MKSTR(Str) {Str, TEST_CONCAT(L, Str), TEST_CONCAT(u, Str), TEST_CONCAT(U, Str)} From 3873d0b3d899bb84a5983450dd2d98006c4527e2 Mon Sep 17 00:00:00 2001 From: Sergej Jaskiewicz Date: Wed, 6 May 2020 01:10:57 +0300 Subject: [PATCH 041/770] Re-commit "[cmake] Allow std::filesystem tests in CrossWinToARMLinux.cmake" https://reviews.llvm.org/D78200 has been re-committed, so we can now enable building std::filesystem and running tests for it. --- clang/cmake/caches/CrossWinToARMLinux.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/cmake/caches/CrossWinToARMLinux.cmake b/clang/cmake/caches/CrossWinToARMLinux.cmake index c01c31ae5a722..3d1e961ada8d0 100644 --- a/clang/cmake/caches/CrossWinToARMLinux.cmake +++ b/clang/cmake/caches/CrossWinToARMLinux.cmake @@ -89,9 +89,6 @@ set(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX OFF CACHE BOOL "") set(LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXXABI OFF CACHE BOOL "") set(LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX OFF CACHE BOOL "") -# FIXME: Remove this when https://reviews.llvm.org/D78200 is merged. -set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "") - set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "") set(LIBCXX_TARGET_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE STRING "") set(LIBCXX_SYSROOT "${DEFAULT_SYSROOT}" CACHE STRING "") From a9b5edc5e2c4ec9d506b2c30465ee9f2dc21e5cc Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 24 May 2020 23:08:27 +0200 Subject: [PATCH 042/770] Make mlir::Value's bool conversion operator explicit This still allows `if (value)` while requiring an explicit cast when not in a boolean context. This means things like `std::set` will no longer compile. Differential Revision: https://reviews.llvm.org/D80497 --- mlir/include/mlir/EDSC/Builders.h | 2 +- mlir/include/mlir/IR/Value.h | 2 +- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 3 ++- mlir/lib/Parser/Parser.cpp | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h index a6045db3d9985..70443608a2513 100644 --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -303,7 +303,7 @@ struct StructuredIndexed { "MemRef, RankedTensor or Vector expected"); } - bool hasValue() const { return value; } + bool hasValue() const { return (bool)value; } Value getValue() const { assert(value && "StructuredIndexed Value not set."); return value; diff --git a/mlir/include/mlir/IR/Value.h b/mlir/include/mlir/IR/Value.h index 74f504c25156c..f5cb16f347ed3 100644 --- a/mlir/include/mlir/IR/Value.h +++ b/mlir/include/mlir/IR/Value.h @@ -92,7 +92,7 @@ class Value { return U(ownerAndKind); } - operator bool() const { return ownerAndKind.getPointer(); } + explicit operator bool() const { return ownerAndKind.getPointer(); } bool operator==(const Value &other) const { return ownerAndKind == other.ownerAndKind; } diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index f703a8c621e69..9868a14c21651 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -281,7 +281,8 @@ LogicalResult NDTransferOpHelper::doReplace() { } }); - assert((!options.unroll ^ result) && "Expected resulting Value iff unroll"); + assert((!options.unroll ^ (bool)result) && + "Expected resulting Value iff unroll"); if (!result) result = std_load(vector_type_cast(MemRefType::get({}, vectorType), alloc)); rewriter.replaceOp(op, result); diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index f5ad1b65f1a1c..d5108a4ed29ee 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -3542,14 +3542,14 @@ ParseResult OperationParser::finalize() { // Check for any forward references that are left. If we find any, error // out. if (!forwardRefPlaceholders.empty()) { - SmallVector, 4> errors; + SmallVector errors; // Iteration over the map isn't deterministic, so sort by source location. for (auto entry : forwardRefPlaceholders) - errors.push_back({entry.second.getPointer(), entry.first}); + errors.push_back(entry.second.getPointer()); llvm::array_pod_sort(errors.begin(), errors.end()); for (auto entry : errors) { - auto loc = SMLoc::getFromPointer(entry.first); + auto loc = SMLoc::getFromPointer(entry); emitError(loc, "use of undeclared SSA value name"); } return failure(); From 77aec3b4c0e63b07d98dbb1aeb693d200e769a05 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 25 May 2020 19:55:38 +0300 Subject: [PATCH 043/770] [AMDGPU][MC][GFX8+] Enabled clamp for v_add_u16, v_sub_u16 and v_subrev_u16 See https://bugs.llvm.org/show_bug.cgi?id=45926 Reviewers: arsenm, rampitec, vpykhtin Differential Revision: https://reviews.llvm.org/D80430 --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP2Instructions.td | 6 +++--- .../AMDGPU/GlobalISel/inst-select-add.s16.mir | 16 ++++++++-------- llvm/test/MC/AMDGPU/gfx10_asm_all.s | 6 ++++++ llvm/test/MC/AMDGPU/vop2.s | 15 +++++++++++++++ .../MC/Disassembler/AMDGPU/gfx10_dasm_all.txt | 6 ++++++ llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt | 9 +++++++++ 7 files changed, 48 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 397791677eda7..b988de596c648 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2266,6 +2266,7 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; +def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 594e11ec4f54c..c1ce1b755322d 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -626,9 +626,9 @@ defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; } // End FPDPRounding = 1 -defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>; -defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>; -defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; +defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; +defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; +defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir index cc48e9126c9b7..ad8f5df33ab07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -19,14 +19,14 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -51,14 +51,14 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ADD_U16_e64_]], 0, 16, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -84,13 +84,13 @@ body: | ; GFX6-LABEL: name: add_s16_neg_inline_const_64 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64 ; GFX10: liveins: $vgpr0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -113,13 +113,13 @@ body: | ; GFX6-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX10: liveins: $vgpr0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_SUB_U16_e64_]], 0, 16, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s index 13072ad0c049e..aa09c30903ba2 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -58995,6 +58995,9 @@ v_add_nc_u16 v5, v1, 0.5 v_add_nc_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x03,0xd7,0x01,0xef,0x01,0x00] +v_add_nc_u16 v5, v1, -4.0 clamp +// GFX10: encoding: [0x05,0x80,0x03,0xd7,0x01,0xef,0x01,0x00] + v_sub_nc_u16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] @@ -59073,6 +59076,9 @@ v_sub_nc_u16 v5, v1, 0.5 v_sub_nc_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x04,0xd7,0x01,0xef,0x01,0x00] +v_sub_nc_u16 v5, v1, -4.0 clamp +// GFX10: encoding: [0x05,0x80,0x04,0xd7,0x01,0xef,0x01,0x00] + v_mul_lo_u16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index 552bc468b63d2..1505c8cfa44d9 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -435,16 +435,31 @@ v_madak_f16 v1, v2, v3, 64.0 // VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] v_add_u16_e32 v1, v2, v3 +// NOSICI: error: invalid operand for instruction +// NOSICI: v_add_u16 v1, v2, v3 clamp +// VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00] +v_add_u16 v1, v2, v3 clamp + // NOSICI: error: instruction not supported on this GPU // NOSICI: v_sub_u16_e32 v1, v2, v3 // VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] v_sub_u16_e32 v1, v2, v3 +// NOSICI: error: invalid operand for instruction +// NOSICI: v_sub_u16 v1, v2, v3 clamp +// VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00] +v_sub_u16 v1, v2, v3 clamp + // NOSICI: error: instruction not supported on this GPU // NOSICI: v_subrev_u16_e32 v1, v2, v3 // VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] v_subrev_u16_e32 v1, v2, v3 +// NOSICI: error: invalid operand for instruction +// NOSICI: v_subrev_u16 v1, v2, v3 clamp +// VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00] +v_subrev_u16 v1, v2, v3 clamp + // NOSICI: error: instruction not supported on this GPU // NOSICI: v_mul_lo_u16_e32 v1, v2, v3 // VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt index 6b920345d7b8a..89cbaa7f02998 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -21401,6 +21401,9 @@ # GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00 + # GFX10: v_add_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4b] 0x01,0x05,0xfe,0x4b @@ -95808,6 +95811,9 @@ # GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00 + # GFX10: v_sub_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4d] 0x01,0x05,0xfe,0x4d diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt index 4f44326a69bf7..eafbece4d96cf 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt @@ -222,12 +222,21 @@ # VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] 0x02 0x07 0x02 0x4c +# VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00] +0x01 0x80 0x26 0xd1 0x02 0x07 0x02 0x00 + # VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] 0x02 0x07 0x02 0x4e +# VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00] +0x01 0x80 0x27 0xd1 0x02 0x07 0x02 0x00 + # VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] 0x02 0x07 0x02 0x50 +# VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00] +0x01 0x80 0x28 0xd1 0x02 0x07 0x02 0x00 + # VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] 0x02 0x07 0x02 0x52 From 5bf2409a4e4d23018ecffe4eff39988a957e76f7 Mon Sep 17 00:00:00 2001 From: stevewan Date: Mon, 25 May 2020 13:43:22 -0400 Subject: [PATCH 044/770] [AIX] Add '-bcdtors:all:0:s' to linker to gather static init functions Summary: On AIX, add '-bcdtors:all:0:s' to the linker implicitly through the driver so that we can collect all static constructor and destructor functions. Reviewers: hubert.reinterpretcast, Xiangling_L, ZarkoCA, daltenty Reviewed By: hubert.reinterpretcast Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80415 --- clang/lib/Driver/ToolChains/AIX.cpp | 7 +++++++ clang/test/Driver/aix-ld.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 6fbff61f76565..df2e30da32a8c 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -81,6 +81,7 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const AIX &ToolChain = static_cast(getToolChain()); + const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; const bool IsArch32Bit = ToolChain.getTriple().isArch32Bit(); @@ -129,6 +130,12 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(ToolChain.GetFilePath(getCrt0Basename()))); } + // Collect all static constructor and destructor functions in CXX mode. This + // has to come before AddLinkerInputs as the implied option needs to precede + // any other '-bcdtors' settings or '-bnocdtors' that '-Wl' might forward. + if (D.CCCIsCXX()) + CmdArgs.push_back("-bcdtors:all:0:s"); + // Specify linker input file(s). AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); diff --git a/clang/test/Driver/aix-ld.c b/clang/test/Driver/aix-ld.c index 95495718546ca..218fbd2bb3802 100644 --- a/clang/test/Driver/aix-ld.c +++ b/clang/test/Driver/aix-ld.c @@ -175,3 +175,21 @@ // CHECK-LD64-NO-DEFAULT-LIBS: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lpthreads" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lc" + +// Check powerpc-ibm-aix7.1.0.0, 32-bit. 'bcdtors' and argument order. +// RUN: %clangxx -no-canonical-prefixes %s 2>&1 -### \ +// RUN: -Wl,-bnocdtors \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: | FileCheck --check-prefix=CHECK-LD32-CXX-ARG-ORDER %s + +// CHECK-LD32-CXX-ARG-ORDER: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-CXX-ARG-ORDER: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32-CXX-ARG-ORDER: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32-CXX-ARG-ORDER-NOT: "-bnso" +// CHECK-LD32-CXX-ARG-ORDER: "-b32" +// CHECK-LD32-CXX-ARG-ORDER: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32-CXX-ARG-ORDER: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" +// CHECK-LD32-CXX-ARG-ORDER: "-bcdtors:all:0:s" +// CHECK-LD32-CXX-ARG-ORDER: "-bnocdtors" +// CHECK-LD32-CXX-ARG-ORDER-NOT: "-bcdtors:all:0:s" From b321b429416ec51691a3c5372cb59912bded5f08 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 25 May 2020 10:59:39 -0700 Subject: [PATCH 045/770] [lldb/Test] Add a trace method to replace print statements. Many tests use (commented out) print statement for debugging the test itself. This patch adds a new trace method to lldbtest to reuse the existing tracing infrastructure and replace these print statements. Differential revision: https://reviews.llvm.org/D80448 --- .../Python/lldbsuite/test/lldbtest.py | 4 + .../benchmarks/stepping/TestSteppingSpeed.py | 4 +- .../target/basic/TestTargetCommand.py | 2 +- .../TestBreakpointConditions.py | 4 +- .../serialize/TestBreakpointSerialization.py | 2 +- .../TestDataFormatterSkipSummary.py | 6 +- .../load_unload/TestLoadUnload.py | 2 +- .../TestRegisterVariables.py | 164 +++++++++--------- .../class_types/TestClassTypesDisassembly.py | 4 +- .../lang/objc/blocks/TestObjCIvarsInBlocks.py | 2 +- .../lang/objc/foundation/TestSymbolTable.py | 16 -- .../breakpoint/TestBreakpointAPI.py | 4 +- lldb/test/API/python_api/event/TestEvents.py | 14 +- lldb/test/API/python_api/frame/TestFrames.py | 6 +- .../frame/inlines/TestInlinedFrame.py | 2 +- .../function_symbol/TestDisasmAPI.py | 18 +- .../function_symbol/TestSymbolAPI.py | 4 +- .../API/python_api/target/TestTargetAPI.py | 16 +- .../API/python_api/thread/TestThreadAPI.py | 2 +- .../lldb-server/TestGdbRemoteAuxvSupport.py | 7 +- .../TestGdbRemoteExpeditedRegisters.py | 5 +- .../lldb-server/TestGdbRemoteRegisterState.py | 15 +- .../tools/lldb-server/TestLldbGdbServer.py | 5 +- 23 files changed, 136 insertions(+), 172 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index b02181ae1ffc2..639f99463d927 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -505,6 +505,10 @@ def TraceOn(self): """Returns True if we are in trace mode (tracing detailed test execution).""" return traceAlways + def trace(self, *args,**kwargs): + with recording(self, self.TraceOn()) as sbuf: + print(*args, **kwargs, file=sbuf) + @classmethod def setUpClass(cls): """ diff --git a/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py b/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py index e5a8f168b6468..60e4a42108ae3 100644 --- a/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py +++ b/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py @@ -22,8 +22,8 @@ def setUp(self): self.break_spec = '-n main' self.count = 50 - #print("self.exe=%s" % self.exe) - #print("self.break_spec=%s" % self.break_spec) + self.trace("self.exe=%s" % self.exe) + self.trace("self.break_spec=%s" % self.break_spec) @benchmarks_test @no_debug_info_test diff --git a/lldb/test/API/commands/target/basic/TestTargetCommand.py b/lldb/test/API/commands/target/basic/TestTargetCommand.py index 9bc9396e19ed4..83e27e2724642 100644 --- a/lldb/test/API/commands/target/basic/TestTargetCommand.py +++ b/lldb/test/API/commands/target/basic/TestTargetCommand.py @@ -82,7 +82,7 @@ def do_target_command(self): if match: # We will start from (index + 1) .... base = int(match.group(1), 10) + 1 - #print("base is:", base) + self.trace("base is:", base) break self.runCmd("target create " + exe_a, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py b/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py index de9a47d8c2022..a5f9458c05a0e 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py @@ -126,7 +126,7 @@ def breakpoint_conditions_python(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -194,7 +194,7 @@ def breakpoint_invalid_conditions_python(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) diff --git a/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py b/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py index afeccbef3bae7..6a3f40ff3a35b 100644 --- a/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py +++ b/lldb/test/API/functionalities/breakpoint/serialize/TestBreakpointSerialization.py @@ -118,7 +118,7 @@ def check_equivalence(self, source_bps, do_write = True): copy_text = copy_desc.GetData() # These two should be identical. - # print ("Source text for %d is %s."%(i, source_text)) + self.trace("Source text for %d is %s."%(i, source_text)) self.assertTrue (source_text == copy_text, "Source and dest breakpoints are not identical: \nsource: %s\ndest: %s"%(source_text, copy_text)) def do_check_resolvers(self): diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-skip-summary/TestDataFormatterSkipSummary.py b/lldb/test/API/functionalities/data-formatter/data-formatter-skip-summary/TestDataFormatterSkipSummary.py index fa13e922ce4ae..f5cf525427c8f 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-skip-summary/TestDataFormatterSkipSummary.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-skip-summary/TestDataFormatterSkipSummary.py @@ -147,13 +147,13 @@ def cleanup(): import re gcc_version_output = system( [[lldbutil.which(self.getCompiler()), "-v"]])[1] - #print("my output:", gcc_version_output) + self.trace("my output:", gcc_version_output) for line in gcc_version_output.split(os.linesep): m = re.search('\(Apple Inc\. build ([0-9]+)\)', line) - #print("line:", line) + self.trace("line:", line) if m: gcc_build = int(m.group(1)) - #print("gcc build:", gcc_build) + self.trace("gcc build:", gcc_build) if gcc_build >= 5666: # rdar://problem/9804600" self.skipTest( diff --git a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py index e0013ccd93fa6..853c0b2cea201 100644 --- a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py +++ b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py @@ -267,7 +267,7 @@ def run_lldb_process_load_and_unload_commands(self): output = self.res.GetOutput() pattern = re.compile("Image ([0-9]+) loaded") for l in output.split(os.linesep): - #print("l:", l) + self.trace("l:", l) match = pattern.search(l) if match: break diff --git a/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py b/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py index af0ad2a08719d..51b728be2fe6a 100644 --- a/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py +++ b/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py @@ -1,91 +1,11 @@ """Check that compiler-generated register values work correctly""" -from __future__ import print_function - import re import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil -# This method attempts to figure out if a given variable -# is in a register. -# -# Return: -# True if the value has a readable value and is in a register -# False otherwise - - -def is_variable_in_register(frame, var_name): - # Ensure we can lookup the variable. - var = frame.FindVariable(var_name) - # print("\nchecking {}...".format(var_name)) - if var is None or not var.IsValid(): - # print("{} cannot be found".format(var_name)) - return False - - # Check that we can get its value. If not, this - # may be a variable that is just out of scope at this point. - value = var.GetValue() - # print("checking value...") - if value is None: - # print("value is invalid") - return False - # else: - # print("value is {}".format(value)) - - # We have a variable and we can get its value. The variable is in - # a register if we cannot get an address for it, assuming it is - # not a struct pointer. (This is an approximation - compilers can - # do other things with spitting up a value into multiple parts of - # multiple registers, but what we're verifying here is much more - # than it was doing before). - var_addr = var.GetAddress() - # print("checking address...") - if var_addr.IsValid(): - # We have an address, it must not be in a register. - # print("var {} is not in a register: has a valid address {}".format(var_name, var_addr)) - return False - else: - # We don't have an address but we can read the value. - # It is likely stored in a register. - # print("var {} is in a register (we don't have an address for it)".format(var_name)) - return True - - -def is_struct_pointer_in_register(frame, var_name, trace): - # Ensure we can lookup the variable. - var = frame.FindVariable(var_name) - if trace: - print("\nchecking {}...".format(var_name)) - - if var is None or not var.IsValid(): - # print("{} cannot be found".format(var_name)) - return False - - # Check that we can get its value. If not, this - # may be a variable that is just out of scope at this point. - value = var.GetValue() - # print("checking value...") - if value is None: - if trace: - print("value is invalid") - return False - else: - if trace: - print("value is {}".format(value)) - - var_loc = var.GetLocation() - if trace: - print("checking location: {}".format(var_loc)) - if var_loc is None or var_loc.startswith("0x"): - # The frame var is not in a register but rather a memory location. - # print("frame var {} is not in a register".format(var_name)) - return False - else: - # print("frame var {} is in a register".format(var_name)) - return True - def re_expr_equals(val_type, val): # Match ({val_type}) ${sum_digits} = {val} @@ -136,12 +56,12 @@ def test_and_run_command(self): # Try some variables that should be visible frame = self.dbg.GetSelectedTarget().GetProcess( ).GetSelectedThread().GetSelectedFrame() - if is_variable_in_register(frame, 'a'): + if self.is_variable_in_register(frame, 'a'): register_variables_count += 1 self.expect("expr a", VARIABLES_DISPLAYED_CORRECTLY, patterns=[re_expr_equals('int', 2)]) - if is_struct_pointer_in_register(frame, 'b', self.TraceOn()): + if self.is_struct_pointer_in_register(frame, 'b', self.TraceOn()): register_variables_count += 1 self.expect("expr b->m1", VARIABLES_DISPLAYED_CORRECTLY, patterns=[re_expr_equals('int', 3)]) @@ -163,12 +83,12 @@ def test_and_run_command(self): # Try some variables that should be visible frame = self.dbg.GetSelectedTarget().GetProcess( ).GetSelectedThread().GetSelectedFrame() - if is_struct_pointer_in_register(frame, 'b', self.TraceOn()): + if self.is_struct_pointer_in_register(frame, 'b', self.TraceOn()): register_variables_count += 1 self.expect("expr b->m2", VARIABLES_DISPLAYED_CORRECTLY, patterns=[re_expr_equals('int', 5)]) - if is_variable_in_register(frame, 'c'): + if self.is_variable_in_register(frame, 'c'): register_variables_count += 1 self.expect("expr c", VARIABLES_DISPLAYED_CORRECTLY, patterns=[re_expr_equals('int', 5)]) @@ -190,7 +110,7 @@ def test_and_run_command(self): # Try some variables that should be visible frame = self.dbg.GetSelectedTarget().GetProcess( ).GetSelectedThread().GetSelectedFrame() - if is_variable_in_register(frame, 'f'): + if self.is_variable_in_register(frame, 'f'): register_variables_count += 1 self.expect("expr f", VARIABLES_DISPLAYED_CORRECTLY, patterns=[re_expr_equals('float', '3.1')]) @@ -199,6 +119,78 @@ def test_and_run_command(self): self.assertTrue( register_variables_count > 0, "expected to verify at least one variable in a register") - # print("executed {} expressions with values in registers".format(register_variables_count)) + self.trace("executed {} expressions with values in registers".format(register_variables_count)) self.runCmd("kill") + + + def is_variable_in_register(self, frame, var_name): + # Ensure we can lookup the variable. + var = frame.FindVariable(var_name) + self.trace("\nchecking {}...".format(var_name)) + if var is None or not var.IsValid(): + self.trace("{} cannot be found".format(var_name)) + return False + + # Check that we can get its value. If not, this + # may be a variable that is just out of scope at this point. + value = var.GetValue() + self.trace("checking value...") + if value is None: + self.trace("value is invalid") + return False + else: + self.trace("value is {}".format(value)) + + # We have a variable and we can get its value. The variable is in a + # register if we cannot get an address for it, assuming it is not a + # struct pointer. (This is an approximation - compilers can do other + # things with spitting up a value into multiple parts of multiple + # registers, but what we're verifying here is much more than it was + # doing before). + var_addr = var.GetAddress() + self.trace("checking address...") + if var_addr.IsValid(): + # We have an address, it must not be in a register. + self.trace("var {} is not in a register: has a valid address {}".format(var_name, var_addr)) + return False + else: + # We don't have an address but we can read the value. + # It is likely stored in a register. + self.trace("var {} is in a register (we don't have an address for it)".format(var_name)) + return True + + + def is_struct_pointer_in_register(self, frame, var_name, trace): + # Ensure we can lookup the variable. + var = frame.FindVariable(var_name) + if trace: + print("\nchecking {}...".format(var_name)) + + if var is None or not var.IsValid(): + self.trace("{} cannot be found".format(var_name)) + return False + + # Check that we can get its value. If not, this + # may be a variable that is just out of scope at this point. + value = var.GetValue() + self.trace("checking value...") + if value is None: + if trace: + print("value is invalid") + return False + else: + if trace: + print("value is {}".format(value)) + + var_loc = var.GetLocation() + if trace: + print("checking location: {}".format(var_loc)) + if var_loc is None or var_loc.startswith("0x"): + # The frame var is not in a register but rather a memory location. + self.trace("frame var {} is not in a register".format(var_name)) + return False + else: + self.trace("frame var {} is in a register".format(var_name)) + return True + diff --git a/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py b/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py index ad187d0394b6d..9f3d6806451e7 100644 --- a/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py +++ b/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py @@ -33,8 +33,8 @@ def test_and_run_command(self): match = frameRE.search(line) if match: function = match.group(1) - #print("line:", line) - #print("function:", function) + self.trace("line:", line) + self.trace("function:", function) self.runCmd("disassemble -n '%s'" % function) @add_test_categories(['pyapi']) diff --git a/lldb/test/API/lang/objc/blocks/TestObjCIvarsInBlocks.py b/lldb/test/API/lang/objc/blocks/TestObjCIvarsInBlocks.py index e790e6e9d96e4..9eb8931fb4a0e 100644 --- a/lldb/test/API/lang/objc/blocks/TestObjCIvarsInBlocks.py +++ b/lldb/test/API/lang/objc/blocks/TestObjCIvarsInBlocks.py @@ -125,7 +125,7 @@ def test_with_python_api(self): expr, "Successfully got a local variable in a block in a class method.") ret_value_signed = expr.GetValueAsSigned(error) - # print('ret_value_signed = %i' % (ret_value_signed)) + self.trace('ret_value_signed = %i' % (ret_value_signed)) self.assertTrue( ret_value_signed == 5, "The local variable in the block was what we expected.") diff --git a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py index abfc7621e2e7d..b77a8dfc0ed90 100644 --- a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py +++ b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py @@ -50,19 +50,3 @@ def test_with_python_api(self): module = target.FindModule(filespec) self.assertTrue(module, VALID_MODULE) - # Create the set of known symbols. As we iterate through the symbol - # table, remove the symbol from the set if it is a known symbol. - expected_symbols = set(self.symbols_list) - for symbol in module: - self.assertTrue(symbol, VALID_SYMBOL) - #print("symbol:", symbol) - name = symbol.GetName() - if name in expected_symbols: - #print("Removing %s from known_symbols %s" % (name, expected_symbols)) - expected_symbols.remove(name) - - # At this point, the known_symbols set should have become an empty set. - # If not, raise an error. - #print("symbols unaccounted for:", expected_symbols) - self.assertTrue(len(expected_symbols) == 0, - "All the known symbols are accounted for") diff --git a/lldb/test/API/python_api/breakpoint/TestBreakpointAPI.py b/lldb/test/API/python_api/breakpoint/TestBreakpointAPI.py index dd57846963438..1c0c334fbeebf 100644 --- a/lldb/test/API/python_api/breakpoint/TestBreakpointAPI.py +++ b/lldb/test/API/python_api/breakpoint/TestBreakpointAPI.py @@ -25,7 +25,7 @@ def test_breakpoint_is_valid(self): # Now create a breakpoint on main.c by name 'AFunction'. breakpoint = target.BreakpointCreateByName('AFunction', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -59,7 +59,7 @@ def test_target_delete(self): # Now create a breakpoint on main.c by name 'AFunction'. breakpoint = target.BreakpointCreateByName('AFunction', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) diff --git a/lldb/test/API/python_api/event/TestEvents.py b/lldb/test/API/python_api/event/TestEvents.py index 97ebe8ffc03d6..62ed195729f00 100644 --- a/lldb/test/API/python_api/event/TestEvents.py +++ b/lldb/test/API/python_api/event/TestEvents.py @@ -135,7 +135,7 @@ def test_wait_for_event(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -171,9 +171,9 @@ def run(self): # Let's only try at most 3 times to retrieve any kind of event. while not count > 3: if listener.WaitForEvent(5, event): - #print("Got a valid event:", event) - #print("Event data flavor:", event.GetDataFlavor()) - #print("Event type:", lldbutil.state_type_to_str(event.GetType())) + self.trace("Got a valid event:", event) + self.trace("Event data flavor:", event.GetDataFlavor()) + self.trace("Event type:", lldbutil.state_type_to_str(event.GetType())) listener.Clear() return count = count + 1 @@ -215,7 +215,7 @@ def test_add_listener_to_broadcaster(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -256,7 +256,7 @@ def test_add_listener_to_broadcaster(self): class MyListeningThread(threading.Thread): def run(self): - #print("Running MyListeningThread:", self) + self.trace("Running MyListeningThread:", self) # Regular expression pattern for the event description. pattern = re.compile("data = {.*, state = (.*)}$") @@ -266,7 +266,7 @@ def run(self): while True: if listener.WaitForEvent(5, event): desc = lldbutil.get_description(event) - #print("Event description:", desc) + self.trace("Event description:", desc) match = pattern.search(desc) if not match: break diff --git a/lldb/test/API/python_api/frame/TestFrames.py b/lldb/test/API/python_api/frame/TestFrames.py index 6d4b7b51f4263..1ec66a3ddbeb1 100644 --- a/lldb/test/API/python_api/frame/TestFrames.py +++ b/lldb/test/API/python_api/frame/TestFrames.py @@ -28,7 +28,7 @@ def test_get_arg_vals_for_call_stack(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -131,7 +131,7 @@ def test_frame_api_boundary_condition(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -173,7 +173,7 @@ def test_frame_api_IsEqual(self): # Now create a breakpoint on main.c by name 'c'. breakpoint = target.BreakpointCreateByName('c', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT) diff --git a/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py b/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py index da4e9cb06e7b5..eb40b4c4993e7 100644 --- a/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py +++ b/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py @@ -37,7 +37,7 @@ def test_stop_at_outer_inline(self): # Now create a breakpoint on main.c by the name of 'inner_inline'. breakpoint = target.BreakpointCreateByName('inner_inline', 'a.out') - #print("breakpoint:", breakpoint) + self.trace("breakpoint:", breakpoint) self.assertTrue(breakpoint and breakpoint.GetNumLocations() > 1, VALID_BREAKPOINT) diff --git a/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py b/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py index 2278d69fbbe3b..01d26da060d2b 100644 --- a/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py +++ b/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py @@ -38,8 +38,8 @@ def test(self): # Now create the two breakpoints inside function 'a'. breakpoint1 = target.BreakpointCreateByLocation('main.c', self.line1) breakpoint2 = target.BreakpointCreateByLocation('main.c', self.line2) - #print("breakpoint1:", breakpoint1) - #print("breakpoint2:", breakpoint2) + self.trace("breakpoint1:", breakpoint1) + self.trace("breakpoint2:", breakpoint2) self.assertTrue(breakpoint1 and breakpoint1.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -64,7 +64,7 @@ def test(self): self.assertTrue(lineEntry.GetLine() == self.line1) address1 = lineEntry.GetStartAddress() - #print("address1:", address1) + self.trace("address1:", address1) # Now call SBTarget.ResolveSymbolContextForAddress() with address1. context1 = target.ResolveSymbolContextForAddress( @@ -103,15 +103,11 @@ def test(self): print("disassembly=>\n", disasm_output) sa1 = symbol.GetStartAddress() - #print("sa1:", sa1) - #print("sa1.GetFileAddress():", hex(sa1.GetFileAddress())) - #ea1 = symbol.GetEndAddress() - #print("ea1:", ea1) + self.trace("sa1:", sa1) + self.trace("sa1.GetFileAddress():", hex(sa1.GetFileAddress())) sa2 = function.GetStartAddress() - #print("sa2:", sa2) - #print("sa2.GetFileAddress():", hex(sa2.GetFileAddress())) - #ea2 = function.GetEndAddress() - #print("ea2:", ea2) + self.trace("sa2:", sa2) + self.trace("sa2.GetFileAddress():", hex(sa2.GetFileAddress())) self.assertTrue(sa1 and sa2 and sa1 == sa2, "The two starting addresses should be the same") diff --git a/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py b/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py index 56fa73c84ad66..c5bcb152beb0c 100644 --- a/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py +++ b/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py @@ -38,8 +38,8 @@ def test(self): # Now create the two breakpoints inside function 'a'. breakpoint1 = target.BreakpointCreateByLocation('main.c', self.line1) breakpoint2 = target.BreakpointCreateByLocation('main.c', self.line2) - #print("breakpoint1:", breakpoint1) - #print("breakpoint2:", breakpoint2) + self.trace("breakpoint1:", breakpoint1) + self.trace("breakpoint2:", breakpoint2) self.assertTrue(breakpoint1 and breakpoint1.GetNumLocations() == 1, VALID_BREAKPOINT) diff --git a/lldb/test/API/python_api/target/TestTargetAPI.py b/lldb/test/API/python_api/target/TestTargetAPI.py index c5b960528d4b5..016754720c8ce 100644 --- a/lldb/test/API/python_api/target/TestTargetAPI.py +++ b/lldb/test/API/python_api/target/TestTargetAPI.py @@ -359,8 +359,8 @@ def resolve_symbol_context_with_address(self): # Now create the two breakpoints inside function 'a'. breakpoint1 = target.BreakpointCreateByLocation('main.c', self.line1) breakpoint2 = target.BreakpointCreateByLocation('main.c', self.line2) - #print("breakpoint1:", breakpoint1) - #print("breakpoint2:", breakpoint2) + self.trace("breakpoint1:", breakpoint1) + self.trace("breakpoint2:", breakpoint2) self.assertTrue(breakpoint1 and breakpoint1.GetNumLocations() == 1, VALID_BREAKPOINT) @@ -402,8 +402,8 @@ def resolve_symbol_context_with_address(self): address2 = lineEntry.GetStartAddress() - #print("address1:", address1) - #print("address2:", address2) + self.trace("address1:", address1) + self.trace("address2:", address2) # Now call SBTarget.ResolveSymbolContextForAddress() with the addresses # from our line entry. @@ -413,15 +413,15 @@ def resolve_symbol_context_with_address(self): address2, lldb.eSymbolContextEverything) self.assertTrue(context1 and context2) - #print("context1:", context1) - #print("context2:", context2) + self.trace("context1:", context1) + self.trace("context2:", context2) # Verify that the context point to the same function 'a'. symbol1 = context1.GetSymbol() symbol2 = context2.GetSymbol() self.assertTrue(symbol1 and symbol2) - #print("symbol1:", symbol1) - #print("symbol2:", symbol2) + self.trace("symbol1:", symbol1) + self.trace("symbol2:", symbol2) from lldbsuite.test.lldbutil import get_description desc1 = get_description(symbol1) diff --git a/lldb/test/API/python_api/thread/TestThreadAPI.py b/lldb/test/API/python_api/thread/TestThreadAPI.py index 144f062846aee..2101527dee6be 100644 --- a/lldb/test/API/python_api/thread/TestThreadAPI.py +++ b/lldb/test/API/python_api/thread/TestThreadAPI.py @@ -100,7 +100,7 @@ def get_process(self): self.runCmd("process status") proc_of_thread = thread.GetProcess() - #print("proc_of_thread:", proc_of_thread) + self.trace("proc_of_thread:", proc_of_thread) self.assertTrue(proc_of_thread.GetProcessID() == process.GetProcessID()) diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteAuxvSupport.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteAuxvSupport.py index 1a3a2b2936504..b89448fd5ba6a 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteAuxvSupport.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteAuxvSupport.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - import gdbremote_testcase from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -119,7 +116,7 @@ def auxv_data_is_correct_size(self): # Ensure auxv data is a multiple of 2*word_size (there should be two # unsigned long fields per auxv entry). self.assertEqual(len(auxv_data) % (2 * word_size), 0) - # print("auxv contains {} entries".format(len(auxv_data) / (2*word_size))) + self.trace("auxv contains {} entries".format(len(auxv_data) / (2*word_size))) @debugserver_test def test_auxv_data_is_correct_size_debugserver(self): @@ -159,7 +156,7 @@ def auxv_keys_look_valid(self): for auxv_key in auxv_dict: self.assertTrue(auxv_key >= 1) self.assertTrue(auxv_key <= 1000) - # print("auxv dict: {}".format(auxv_dict)) + self.trace("auxv dict: {}".format(auxv_dict)) @debugserver_test def test_auxv_keys_look_valid_debugserver(self): diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteExpeditedRegisters.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteExpeditedRegisters.py index 7d8e28c745c94..f74143a3cceec 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteExpeditedRegisters.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteExpeditedRegisters.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - import gdbremote_testcase from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -59,7 +56,7 @@ def stop_notification_contains_generic_register( # Ensure the expedited registers contained it. self.assertTrue(reg_info["lldb_register_index"] in expedited_registers) - # print("{} reg_info:{}".format(generic_register_name, reg_info)) + self.trace("{} reg_info:{}".format(generic_register_name, reg_info)) def stop_notification_contains_any_registers(self): # Generate a stop reply, parse out expedited registers from stop diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py index 2543ed6e90299..e20948ba38af6 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteRegisterState.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - import gdbremote_testcase from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -50,7 +47,7 @@ def grp_register_save_restore_works(self, with_suffix): self.assertIsNotNone(threads) thread_id = threads[0] self.assertIsNotNone(thread_id) - # print("Running on thread: 0x{:x}".format(thread_id)) + self.trace("Running on thread: 0x{:x}".format(thread_id)) else: thread_id = None @@ -64,22 +61,22 @@ def grp_register_save_restore_works(self, with_suffix): (success, state_id) = self.parse_QSaveRegisterState_response(context) self.assertTrue(success) self.assertIsNotNone(state_id) - # print("saved register state id: {}".format(state_id)) + self.trace("saved register state id: {}".format(state_id)) # Remember initial register values. initial_reg_values = self.read_register_values( gpr_reg_infos, endian, thread_id=thread_id) - # print("initial_reg_values: {}".format(initial_reg_values)) + self.trace("initial_reg_values: {}".format(initial_reg_values)) # Flip gpr register values. (successful_writes, failed_writes) = self.flip_all_bits_in_each_register_value( gpr_reg_infos, endian, thread_id=thread_id) - # print("successful writes: {}, failed writes: {}".format(successful_writes, failed_writes)) + self.trace("successful writes: {}, failed writes: {}".format(successful_writes, failed_writes)) self.assertTrue(successful_writes > 0) flipped_reg_values = self.read_register_values( gpr_reg_infos, endian, thread_id=thread_id) - # print("flipped_reg_values: {}".format(flipped_reg_values)) + self.trace("flipped_reg_values: {}".format(flipped_reg_values)) # Restore register values. self.reset_test_sequence() @@ -91,7 +88,7 @@ def grp_register_save_restore_works(self, with_suffix): # Verify registers match initial register values. final_reg_values = self.read_register_values( gpr_reg_infos, endian, thread_id=thread_id) - # print("final_reg_values: {}".format(final_reg_values)) + self.trace("final_reg_values: {}".format(final_reg_values)) self.assertIsNotNone(final_reg_values) self.assertEqual(final_reg_values, initial_reg_values) diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py index 2b7f28a3aefbc..d46123e337c80 100644 --- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py @@ -10,9 +10,6 @@ the initial set of tests implemented. """ -from __future__ import division, print_function - - import unittest2 import gdbremote_testcase import lldbgdbserverutils @@ -1442,7 +1439,7 @@ def P_writes_all_gpr_registers(self): # Write flipped bit pattern of existing value to each register. (successful_writes, failed_writes) = self.flip_all_bits_in_each_register_value( gpr_reg_infos, endian) - # print("successful writes: {}, failed writes: {}".format(successful_writes, failed_writes)) + self.trace("successful writes: {}, failed writes: {}".format(successful_writes, failed_writes)) self.assertTrue(successful_writes > 0) # Note: as of this moment, a hefty number of the GPR writes are failing with E32 (everything except rax-rdx, rdi, rsi, rbp). From 2b8d6fa0acacba4dee31ed618a5596414b2279d5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 25 May 2020 20:13:03 +0200 Subject: [PATCH 046/770] Revert "[PGO] Fix computation of function Hash" This reverts commit 7c298c104bfe725d4315926a656263e8a5ac3054. Fails make check-clang. Failing Tests (8): Clang :: Profile/c-counter-overflows.c Clang :: Profile/c-general.c Clang :: Profile/c-unprofiled-blocks.c Clang :: Profile/cxx-rangefor.cpp Clang :: Profile/cxx-throws.cpp Clang :: Profile/misexpect-switch-default.c Clang :: Profile/misexpect-switch-nonconst.c Clang :: Profile/misexpect-switch.c --- clang/lib/CodeGen/CodeGenPGO.cpp | 8 +++----- clang/test/Profile/c-collision.c | 22 ---------------------- 2 files changed, 3 insertions(+), 27 deletions(-) delete mode 100644 clang/test/Profile/c-collision.c diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 98827bc3eec5e..3c91a04d54642 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -747,15 +747,13 @@ uint64_t PGOHash::finalize() { return Working; // Check for remaining work in Working. - if (Working) { - using namespace llvm::support; - uint64_t Swapped = endian::byte_swap(Working); - MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); - } + if (Working) + MD5.update(Working); // Finalize the MD5 and return the hash. llvm::MD5::MD5Result Result; MD5.final(Result); + using namespace llvm::support; return Result.low(); } diff --git a/clang/test/Profile/c-collision.c b/clang/test/Profile/c-collision.c deleted file mode 100644 index fabecd752b4ef..0000000000000 --- a/clang/test/Profile/c-collision.c +++ /dev/null @@ -1,22 +0,0 @@ -// Test that a slight change in the code leads to a different hash. -// RUN: %clang_cc1 -UEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-NOEXTRA -// RUN: %clang_cc1 -DEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-EXTRA - -// CHECK-NOEXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 7156072912471487002, -// CHECK-EXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 -4383447408116050035, - -extern int bar; -void foo() { - if (bar) { - } - if (bar) { - } - if (bar) { - if (bar) { -#ifdef EXTRA - if (bar) { - } -#endif - } - } -} From e0aefaedb617766f4667118911fccb4a14abfb94 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 25 May 2020 18:14:50 +0000 Subject: [PATCH 047/770] [gn build] Port ba92b274225 --- llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn index fb171a4a48cb2..da6b850cd0d47 100644 --- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn @@ -14,6 +14,7 @@ unittest("StaticAnalysisTests") { "AnalyzerOptionsTest.cpp", "CallDescriptionTest.cpp", "CallEventTest.cpp", + "RangeSetTest.cpp", "RegisterCustomCheckersTest.cpp", "StoreTest.cpp", "SymbolReaperTest.cpp", From 37ef15143a5d77a0fba0ece4c26a72cfb9e050a0 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Sat, 23 May 2020 14:43:12 -0700 Subject: [PATCH 048/770] [libcxx] Fix C++14 and up constexpr members in MoveOnly. Summary: a4b8ee6 made all MoveOnly members constexpr but, some members and constructors contain expressions that are only valid in C++14 and later. This patch prefixes those methods and constructors with TEST_CONSTEXPR_CXX14. Reviewers: ldionne, #libc! Subscribers: dexonsmith, libcxx-commits Tags: #libc Differential Revision: https://reviews.llvm.org/D80482 --- libcxx/test/support/MoveOnly.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libcxx/test/support/MoveOnly.h b/libcxx/test/support/MoveOnly.h index f9e9298097f12..adcae2542bd3c 100644 --- a/libcxx/test/support/MoveOnly.h +++ b/libcxx/test/support/MoveOnly.h @@ -24,17 +24,19 @@ class MoveOnly int data_; public: constexpr MoveOnly(int data = 1) : data_(data) {} - constexpr MoveOnly(MoveOnly&& x) + TEST_CONSTEXPR_CXX14 MoveOnly(MoveOnly&& x) : data_(x.data_) {x.data_ = 0;} - constexpr MoveOnly& operator=(MoveOnly&& x) + TEST_CONSTEXPR_CXX14 MoveOnly& operator=(MoveOnly&& x) {data_ = x.data_; x.data_ = 0; return *this;} constexpr int get() const {return data_;} constexpr bool operator==(const MoveOnly& x) const {return data_ == x.data_;} constexpr bool operator< (const MoveOnly& x) const {return data_ < x.data_;} - constexpr MoveOnly operator+(const MoveOnly& x) const { return MoveOnly{data_ + x.data_}; } - constexpr MoveOnly operator*(const MoveOnly& x) const { return MoveOnly{data_ * x.data_}; } + TEST_CONSTEXPR_CXX14 MoveOnly operator+(const MoveOnly& x) const + { return MoveOnly{data_ + x.data_}; } + TEST_CONSTEXPR_CXX14 MoveOnly operator*(const MoveOnly& x) const + { return MoveOnly{data_ * x.data_}; } }; namespace std { From 51a276c759c90c844bbabf5066195aaf42fb0c6e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 May 2020 11:34:09 -0700 Subject: [PATCH 049/770] [X86] Teach combineTruncatedArithmetic to push truncate through subtracts where only one of the inputs is free to truncate. Fix combineSubToSubus to handle the new DAG to avoid a regression. There are still regressions in test14/test15/test16. Where it looks like were trying to set up cases we could match to umin+trunc+subus but the handling was never finished. The regression here isn't unique to sub. Its a lost opportunity for taking an AND with two truncated inputs and producing a larger AND with a single truncate. The same thing could happen with any other node we handle in combineTruncatedArithmetic since we are moving the truncate up the DAG. Differential Revision: https://reviews.llvm.org/D80483 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 45 +- llvm/test/CodeGen/X86/psubus.ll | 886 ++++++++++++------------ 2 files changed, 470 insertions(+), 461 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5101977a68edc..54a80151eb69a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43588,21 +43588,12 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, case ISD::AND: case ISD::XOR: case ISD::OR: - case ISD::ADD: { - SDValue Op0 = Src.getOperand(0); - SDValue Op1 = Src.getOperand(1); - if (TLI.isOperationLegal(SrcOpcode, VT) && - (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) - return TruncateArithmetic(Op0, Op1); - break; - } + case ISD::ADD: case ISD::SUB: { - // TODO: ISD::SUB We are conservative and require both sides to be freely - // truncatable to avoid interfering with combineSubToSubus. SDValue Op0 = Src.getOperand(0); SDValue Op1 = Src.getOperand(1); if (TLI.isOperationLegal(SrcOpcode, VT) && - (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1)))) + (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) return TruncateArithmetic(Op0, Op1); break; } @@ -46698,6 +46689,38 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SubusRHS = MinLHS; else return SDValue(); + } else if (Op1.getOpcode() == ISD::TRUNCATE && + Op1.getOperand(0).getOpcode() == ISD::UMIN && + (EltVT == MVT::i8 || EltVT == MVT::i16)) { + // Special case where the UMIN has been truncated. Try to push the truncate + // further up. This is similar to the i32/i64 special processing. + SubusLHS = Op0; + SDValue MinLHS = Op1.getOperand(0).getOperand(0); + SDValue MinRHS = Op1.getOperand(0).getOperand(1); + EVT TruncVT = Op1.getOperand(0).getValueType(); + if (!(Subtarget.hasSSSE3() && (TruncVT == MVT::v8i32 || + TruncVT == MVT::v8i64)) && + !(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32))) + return SDValue(); + SDValue OpToSaturate; + if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && + MinLHS.getOperand(0) == Op0) + OpToSaturate = MinRHS; + else if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && + MinRHS.getOperand(0) == Op0) + OpToSaturate = MinLHS; + else + return SDValue(); + + // Saturate the non-extended input and then truncate it. + SDLoc DL(N); + SDValue SaturationConst = + DAG.getConstant(APInt::getLowBitsSet(TruncVT.getScalarSizeInBits(), + VT.getScalarSizeInBits()), + DL, TruncVT); + SDValue UMin = DAG.getNode(ISD::UMIN, DL, TruncVT, OpToSaturate, + SaturationConst); + SubusRHS = DAG.getNode(ISD::TRUNCATE, DL, VT, UMin); } else return SDValue(); diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index 21c63da1d1eea..a51893ade0217 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -465,33 +465,33 @@ vector.ph: ret <32 x i8> %res } +; FIXME: match this to UMIN+TRUNC+PSUBUS define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: test13: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] ; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: psubd %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm6 -; SSE2-NEXT: pxor %xmm4, %xmm6 -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: por %xmm3, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm2 -; SSE2-NEXT: packssdw %xmm6, %xmm2 -; SSE2-NEXT: psubd %xmm1, %xmm3 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: packssdw %xmm0, %xmm3 -; SSE2-NEXT: pandn %xmm3, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm7 +; SSE2-NEXT: pxor %xmm6, %xmm7 +; SSE2-NEXT: por %xmm6, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm6, %xmm3 +; SSE2-NEXT: por %xmm6, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm3 +; SSE2-NEXT: packssdw %xmm7, %xmm3 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: packssdw %xmm2, %xmm1 +; SSE2-NEXT: psubw %xmm1, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test13: @@ -499,98 +499,96 @@ define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSSE3-NEXT: pxor %xmm3, %xmm3 ; SSSE3-NEXT: movdqa %xmm0, %xmm4 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] ; SSSE3-NEXT: movdqa %xmm0, %xmm5 -; SSSE3-NEXT: psubd %xmm2, %xmm0 -; SSSE3-NEXT: movdqa %xmm2, %xmm6 -; SSSE3-NEXT: pxor %xmm3, %xmm6 -; SSSE3-NEXT: por %xmm3, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: pxor %xmm3, %xmm2 -; SSSE3-NEXT: por %xmm4, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm2 -; SSSE3-NEXT: packssdw %xmm6, %xmm2 -; SSSE3-NEXT: psubd %xmm1, %xmm4 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; SSSE3-NEXT: pshufb %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm1, %xmm4 -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] -; SSSE3-NEXT: pandn %xmm4, %xmm2 -; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm2, %xmm7 +; SSSE3-NEXT: pxor %xmm6, %xmm7 +; SSSE3-NEXT: por %xmm6, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm6, %xmm3 +; SSSE3-NEXT: por %xmm6, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm3 +; SSSE3-NEXT: packssdw %xmm7, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm4, %xmm2 +; SSSE3-NEXT: pshufb %xmm4, %xmm1 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSSE3-NEXT: psubw %xmm1, %xmm0 +; SSSE3-NEXT: pandn %xmm0, %xmm3 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test13: ; SSE41: # %bb.0: # %vector.ph ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pmaxud %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: pmaxud %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm3 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa %xmm3, %xmm6 +; SSE41-NEXT: pxor %xmm5, %xmm3 +; SSE41-NEXT: movdqa %xmm4, %xmm6 ; SSE41-NEXT: pmaxud %xmm2, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 ; SSE41-NEXT: pxor %xmm5, %xmm6 -; SSE41-NEXT: packssdw %xmm6, %xmm0 -; SSE41-NEXT: psubd %xmm2, %xmm3 -; SSE41-NEXT: psubd %xmm1, %xmm4 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0],xmm1[1],xmm4[2],xmm1[3],xmm4[4],xmm1[5],xmm4[6],xmm1[7] -; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7] -; SSE41-NEXT: packusdw %xmm3, %xmm4 -; SSE41-NEXT: pandn %xmm4, %xmm0 +; SSE41-NEXT: packssdw %xmm6, %xmm3 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm2, %xmm1 +; SSE41-NEXT: psubw %xmm1, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test13: ; AVX1: # %bb.0: # %vector.ph ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpmaxud %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpmaxud %xmm4, %xmm2, %xmm5 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm6 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm0, %xmm6 -; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpackssdw %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1-NEXT: vpandn %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpmaxud %xmm1, %xmm3, %xmm6 +; AVX1-NEXT: vpcmpeqd %xmm6, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test13: ; AVX2: # %bb.0: # %vector.ph -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmaxud %ymm1, %ymm2, %ymm3 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test13: ; AVX512: # %bb.0: # %vector.ph -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vpcmpnltud %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: vpcmpnltud %ymm1, %ymm2, %k1 +; AVX512-NEXT: vpmovdw %ymm1, %xmm1 +; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq vector.ph: @@ -602,186 +600,172 @@ vector.ph: ret <8 x i16> %res } +; FIXME: match this to UMIN+TRUNC+PSUBUS define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind { ; SSE2-LABEL: test14: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm5, %xmm6 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm6, %xmm8 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm5, %xmm10 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm8, %xmm8 +; SSE2-NEXT: movdqa %xmm0, %xmm6 ; SSE2-NEXT: movdqa %xmm4, %xmm9 -; SSE2-NEXT: pxor %xmm0, %xmm9 -; SSE2-NEXT: psubd %xmm5, %xmm4 -; SSE2-NEXT: por %xmm0, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm5 -; SSE2-NEXT: movdqa %xmm3, %xmm7 -; SSE2-NEXT: pxor %xmm0, %xmm7 -; SSE2-NEXT: psubd %xmm10, %xmm3 -; SSE2-NEXT: por %xmm0, %xmm10 -; SSE2-NEXT: pcmpgtd %xmm7, %xmm10 -; SSE2-NEXT: packssdw %xmm5, %xmm10 -; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: pxor %xmm0, %xmm5 -; SSE2-NEXT: psubd %xmm6, %xmm2 -; SSE2-NEXT: por %xmm0, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: pxor %xmm0, %xmm5 -; SSE2-NEXT: por %xmm8, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm0 -; SSE2-NEXT: packssdw %xmm6, %xmm0 -; SSE2-NEXT: packsswb %xmm10, %xmm0 -; SSE2-NEXT: psubd %xmm8, %xmm1 +; SSE2-NEXT: movdqa %xmm3, %xmm10 +; SSE2-NEXT: movdqa %xmm2, %xmm7 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] ; SSE2-NEXT: pand %xmm5, %xmm4 ; SSE2-NEXT: pand %xmm5, %xmm3 ; SSE2-NEXT: packuswb %xmm4, %xmm3 +; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: pand %xmm5, %xmm2 ; SSE2-NEXT: pand %xmm5, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 ; SSE2-NEXT: packuswb %xmm3, %xmm1 +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15] +; SSE2-NEXT: movdqa %xmm6, %xmm3 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7] +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm5, %xmm9 +; SSE2-NEXT: por %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 +; SSE2-NEXT: pxor %xmm5, %xmm10 +; SSE2-NEXT: por %xmm5, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 +; SSE2-NEXT: packssdw %xmm6, %xmm3 +; SSE2-NEXT: pxor %xmm5, %xmm7 +; SSE2-NEXT: por %xmm5, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm7, %xmm2 +; SSE2-NEXT: pxor %xmm5, %xmm4 +; SSE2-NEXT: por %xmm5, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 +; SSE2-NEXT: packssdw %xmm2, %xmm0 +; SSE2-NEXT: packsswb %xmm3, %xmm0 ; SSE2-NEXT: pandn %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test14: ; SSSE3: # %bb.0: # %vector.ph -; SSSE3-NEXT: movdqa %xmm0, %xmm5 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm5, %xmm6 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; SSSE3-NEXT: movdqa %xmm6, %xmm8 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] -; SSSE3-NEXT: movdqa %xmm5, %xmm10 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm8, %xmm8 +; SSSE3-NEXT: movdqa %xmm0, %xmm6 ; SSSE3-NEXT: movdqa %xmm4, %xmm9 -; SSSE3-NEXT: pxor %xmm0, %xmm9 -; SSSE3-NEXT: psubd %xmm5, %xmm4 -; SSSE3-NEXT: por %xmm0, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5 -; SSSE3-NEXT: movdqa %xmm3, %xmm7 -; SSSE3-NEXT: pxor %xmm0, %xmm7 -; SSSE3-NEXT: psubd %xmm10, %xmm3 -; SSSE3-NEXT: por %xmm0, %xmm10 -; SSSE3-NEXT: pcmpgtd %xmm7, %xmm10 -; SSSE3-NEXT: packssdw %xmm5, %xmm10 -; SSSE3-NEXT: movdqa %xmm2, %xmm5 -; SSSE3-NEXT: pxor %xmm0, %xmm5 -; SSSE3-NEXT: psubd %xmm6, %xmm2 -; SSSE3-NEXT: por %xmm0, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 -; SSSE3-NEXT: movdqa %xmm1, %xmm5 -; SSSE3-NEXT: pxor %xmm0, %xmm5 -; SSSE3-NEXT: por %xmm8, %xmm0 -; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 -; SSSE3-NEXT: packssdw %xmm6, %xmm0 -; SSSE3-NEXT: packsswb %xmm10, %xmm0 -; SSSE3-NEXT: psubd %xmm8, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm10 +; SSSE3-NEXT: movdqa %xmm2, %xmm7 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] ; SSSE3-NEXT: pand %xmm5, %xmm4 ; SSSE3-NEXT: pand %xmm5, %xmm3 ; SSSE3-NEXT: packuswb %xmm4, %xmm3 +; SSSE3-NEXT: movdqa %xmm1, %xmm4 ; SSSE3-NEXT: pand %xmm5, %xmm2 ; SSSE3-NEXT: pand %xmm5, %xmm1 ; SSSE3-NEXT: packuswb %xmm2, %xmm1 ; SSSE3-NEXT: packuswb %xmm3, %xmm1 +; SSSE3-NEXT: psubb %xmm0, %xmm1 +; SSSE3-NEXT: movdqa %xmm0, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] +; SSSE3-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15] +; SSSE3-NEXT: movdqa %xmm6, %xmm3 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm5, %xmm9 +; SSSE3-NEXT: por %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 +; SSSE3-NEXT: pxor %xmm5, %xmm10 +; SSSE3-NEXT: por %xmm5, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 +; SSSE3-NEXT: packssdw %xmm6, %xmm3 +; SSSE3-NEXT: pxor %xmm5, %xmm7 +; SSSE3-NEXT: por %xmm5, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm7, %xmm2 +; SSSE3-NEXT: pxor %xmm5, %xmm4 +; SSSE3-NEXT: por %xmm5, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm0 +; SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSSE3-NEXT: packsswb %xmm3, %xmm0 ; SSSE3-NEXT: pandn %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test14: ; SSE41: # %bb.0: # %vector.ph ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,2,3] -; SSE41-NEXT: pmovzxbd {{.*#+}} xmm11 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero -; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1] -; SSE41-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE41-NEXT: pmovzxbd {{.*#+}} xmm10 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pmaxud %xmm10, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm3, %xmm7 -; SSE41-NEXT: pmaxud %xmm9, %xmm7 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,0,1] +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero +; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[3,1,2,3] +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero +; SSE41-NEXT: pmaxud %xmm4, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm9 +; SSE41-NEXT: pxor %xmm9, %xmm6 +; SSE41-NEXT: pmaxud %xmm3, %xmm7 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm7 -; SSE41-NEXT: pxor %xmm6, %xmm7 -; SSE41-NEXT: packssdw %xmm0, %xmm7 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pmaxud %xmm8, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm2, %xmm5 -; SSE41-NEXT: pmaxud %xmm11, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm5 -; SSE41-NEXT: pxor %xmm6, %xmm5 -; SSE41-NEXT: packssdw %xmm5, %xmm0 -; SSE41-NEXT: packsswb %xmm7, %xmm0 -; SSE41-NEXT: psubd %xmm11, %xmm2 -; SSE41-NEXT: psubd %xmm8, %xmm1 -; SSE41-NEXT: psubd %xmm9, %xmm3 -; SSE41-NEXT: psubd %xmm10, %xmm4 -; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; SSE41-NEXT: pand %xmm5, %xmm4 -; SSE41-NEXT: pand %xmm5, %xmm3 +; SSE41-NEXT: pxor %xmm9, %xmm7 +; SSE41-NEXT: packssdw %xmm6, %xmm7 +; SSE41-NEXT: pmaxud %xmm1, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm5 +; SSE41-NEXT: pxor %xmm9, %xmm5 +; SSE41-NEXT: pmaxud %xmm2, %xmm8 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm8 +; SSE41-NEXT: pxor %xmm9, %xmm8 +; SSE41-NEXT: packssdw %xmm8, %xmm5 +; SSE41-NEXT: packsswb %xmm7, %xmm5 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE41-NEXT: pand %xmm6, %xmm4 +; SSE41-NEXT: pand %xmm6, %xmm3 ; SSE41-NEXT: packusdw %xmm4, %xmm3 -; SSE41-NEXT: pand %xmm5, %xmm1 -; SSE41-NEXT: pand %xmm5, %xmm2 +; SSE41-NEXT: pand %xmm6, %xmm2 +; SSE41-NEXT: pand %xmm6, %xmm1 ; SSE41-NEXT: packusdw %xmm2, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: pandn %xmm1, %xmm0 +; SSE41-NEXT: psubb %xmm0, %xmm1 +; SSE41-NEXT: pandn %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test14: ; AVX1: # %bb.0: # %vector.ph ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm8 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm9 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,0,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm11 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,0,1] -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vpmaxud %xmm0, %xmm6, %xmm7 -; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm7 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm3, %xmm7, %xmm7 -; AVX1-NEXT: vpmaxud %xmm11, %xmm2, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4 -; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vpackssdw %xmm7, %xmm4, %xmm10 -; AVX1-NEXT: vpmaxud %xmm9, %xmm1, %xmm7 -; AVX1-NEXT: vpcmpeqd %xmm7, %xmm1, %xmm7 -; AVX1-NEXT: vpxor %xmm3, %xmm7, %xmm7 +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[3,3,0,1] +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 +; AVX1-NEXT: vpmaxud %xmm6, %xmm7, %xmm6 +; AVX1-NEXT: vpcmpeqd %xmm6, %xmm7, %xmm6 +; AVX1-NEXT: vpcmpeqd %xmm7, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm7, %xmm6, %xmm6 +; AVX1-NEXT: vpmaxud %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpxor %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpackssdw %xmm6, %xmm5, %xmm5 +; AVX1-NEXT: vpmaxud %xmm4, %xmm1, %xmm4 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm4 +; AVX1-NEXT: vpxor %xmm7, %xmm4, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 +; AVX1-NEXT: vpmaxud %xmm3, %xmm6, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm6, %xmm3 +; AVX1-NEXT: vpxor %xmm7, %xmm3, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpacksswb %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpmaxud %xmm8, %xmm4, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5 -; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vpackssdw %xmm3, %xmm7, %xmm3 -; AVX1-NEXT: vpacksswb %xmm10, %xmm3, %xmm3 -; AVX1-NEXT: vpsubd %xmm8, %xmm4, %xmm4 -; AVX1-NEXT: vpsubd %xmm9, %xmm1, %xmm1 -; AVX1-NEXT: vpsubd %xmm11, %xmm2, %xmm2 -; AVX1-NEXT: vpsubd %xmm0, %xmm6, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255] -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm2 -; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpandn %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -790,40 +774,39 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind { ; AVX2: # %bb.0: # %vector.ph ; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vpmaxud %ymm0, %ymm1, %ymm4 +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX2-NEXT: vpmaxud %ymm4, %ymm1, %ymm4 ; AVX2-NEXT: vpcmpeqd %ymm4, %ymm1, %ymm4 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 ; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4 ; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm6 ; AVX2-NEXT: vpackssdw %xmm6, %xmm4, %xmm4 -; AVX2-NEXT: vpmaxud %ymm3, %ymm2, %ymm6 -; AVX2-NEXT: vpcmpeqd %ymm6, %ymm2, %ymm6 -; AVX2-NEXT: vpxor %ymm5, %ymm6, %ymm5 -; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 -; AVX2-NEXT: vpackssdw %xmm6, %xmm5, %xmm5 -; AVX2-NEXT: vpacksswb %xmm5, %xmm4, %xmm4 -; AVX2-NEXT: vpsubd %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpmaxud %ymm3, %ymm2, %ymm3 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm3 +; AVX2-NEXT: vpxor %ymm5, %ymm3, %ymm3 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm5 +; AVX2-NEXT: vpackssdw %xmm5, %xmm3, %xmm3 +; AVX2-NEXT: vpacksswb %xmm3, %xmm4, %xmm3 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; AVX2-NEXT: vpshufb %ymm4, %ymm2, %ymm2 +; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm5, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] -; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpandn %xmm0, %xmm4, %xmm0 +; AVX2-NEXT: vpand %xmm5, %xmm1, %xmm1 +; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test14: ; AVX512: # %bb.0: # %vector.ph -; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512-NEXT: vpcmpnltud %zmm0, %zmm1, %k1 -; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpcmpnltud %zmm2, %zmm1, %k1 +; AVX512-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq vector.ph: @@ -835,123 +818,127 @@ vector.ph: ret <16 x i8> %res } +; FIXME: match this to UMIN+TRUNC+PSUBUS define <8 x i16> @test15(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: test15: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: psubd %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: por %xmm0, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: psubd %xmm1, %xmm0 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm3, %xmm0 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: packssdw %xmm2, %xmm1 +; SSE2-NEXT: psubw %xmm1, %xmm0 ; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test15: ; SSSE3: # %bb.0: # %vector.ph -; SSSE3-NEXT: pxor %xmm4, %xmm4 -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm3, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] ; SSSE3-NEXT: movdqa %xmm0, %xmm5 -; SSSE3-NEXT: psubd %xmm2, %xmm0 -; SSSE3-NEXT: pxor %xmm4, %xmm2 -; SSSE3-NEXT: por %xmm4, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm2, %xmm6 +; SSSE3-NEXT: pxor %xmm3, %xmm6 +; SSSE3-NEXT: por %xmm3, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm6, %xmm5 +; SSSE3-NEXT: movdqa %xmm1, %xmm6 +; SSSE3-NEXT: pxor %xmm3, %xmm6 ; SSSE3-NEXT: por %xmm3, %xmm4 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 ; SSSE3-NEXT: packssdw %xmm5, %xmm4 -; SSSE3-NEXT: psubd %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; SSSE3-NEXT: pshufb %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm1, %xmm3 -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSSE3-NEXT: pand %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm3, %xmm2 +; SSSE3-NEXT: pshufb %xmm3, %xmm1 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSSE3-NEXT: psubw %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm4, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test15: ; SSE41: # %bb.0: # %vector.ph ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT: movdqa %xmm0, %xmm4 -; SSE41-NEXT: pminud %xmm1, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 -; SSE41-NEXT: pxor %xmm5, %xmm4 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT: movdqa %xmm4, %xmm5 +; SSE41-NEXT: pminud %xmm1, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE41-NEXT: pxor %xmm4, %xmm5 ; SSE41-NEXT: movdqa %xmm3, %xmm6 ; SSE41-NEXT: pminud %xmm2, %xmm6 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 -; SSE41-NEXT: pxor %xmm5, %xmm6 -; SSE41-NEXT: packssdw %xmm6, %xmm4 -; SSE41-NEXT: psubd %xmm2, %xmm3 -; SSE41-NEXT: psubd %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7] -; SSE41-NEXT: packusdw %xmm3, %xmm0 -; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: pxor %xmm4, %xmm6 +; SSE41-NEXT: packssdw %xmm6, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7] +; SSE41-NEXT: packusdw %xmm2, %xmm1 +; SSE41-NEXT: psubw %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test15: ; AVX1: # %bb.0: # %vector.ph ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4 -; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm5 -; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpandn %xmm0, %xmm5, %xmm0 -; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpandn %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpminud %xmm4, %xmm2, %xmm5 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpminud %xmm1, %xmm3, %xmm6 +; AVX1-NEXT: vpcmpeqd %xmm6, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test15: ; AVX2: # %bb.0: # %vector.ph -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 -; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpminud %ymm1, %ymm2, %ymm3 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test15: ; AVX512: # %bb.0: # %vector.ph -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: vpcmpnleud %ymm1, %ymm2, %k1 +; AVX512-NEXT: vpmovdw %ymm1, %xmm1 +; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq vector.ph: @@ -963,80 +950,77 @@ vector.ph: ret <8 x i16> %res } +; FIXME: match this to UMIN+TRUNC+PSUBUS define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: test16: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: psubd %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: por %xmm0, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: psubd %xmm1, %xmm0 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm3, %xmm0 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: packssdw %xmm2, %xmm1 +; SSE2-NEXT: psubw %xmm1, %xmm0 ; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test16: ; SSSE3: # %bb.0: # %vector.ph -; SSSE3-NEXT: pxor %xmm4, %xmm4 -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm3, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] ; SSSE3-NEXT: movdqa %xmm0, %xmm5 -; SSSE3-NEXT: psubd %xmm2, %xmm0 -; SSSE3-NEXT: pxor %xmm4, %xmm2 -; SSSE3-NEXT: por %xmm4, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm2, %xmm6 +; SSSE3-NEXT: pxor %xmm3, %xmm6 +; SSSE3-NEXT: por %xmm3, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm6, %xmm5 +; SSSE3-NEXT: movdqa %xmm1, %xmm6 +; SSSE3-NEXT: pxor %xmm3, %xmm6 ; SSSE3-NEXT: por %xmm3, %xmm4 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 ; SSSE3-NEXT: packssdw %xmm5, %xmm4 -; SSSE3-NEXT: psubd %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; SSSE3-NEXT: pshufb %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm1, %xmm3 -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSSE3-NEXT: pand %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm3, %xmm2 +; SSSE3-NEXT: pshufb %xmm3, %xmm1 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSSE3-NEXT: psubw %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm4, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test16: ; SSE41: # %bb.0: # %vector.ph ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT: movdqa %xmm0, %xmm4 -; SSE41-NEXT: pminud %xmm1, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT: pmaxud %xmm1, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm4 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 ; SSE41-NEXT: pxor %xmm5, %xmm4 -; SSE41-NEXT: movdqa %xmm3, %xmm6 -; SSE41-NEXT: pminud %xmm2, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 -; SSE41-NEXT: pxor %xmm5, %xmm6 -; SSE41-NEXT: packssdw %xmm6, %xmm4 -; SSE41-NEXT: psubd %xmm2, %xmm3 -; SSE41-NEXT: psubd %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7] -; SSE41-NEXT: packusdw %xmm3, %xmm0 +; SSE41-NEXT: pmaxud %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE41-NEXT: pxor %xmm5, %xmm3 +; SSE41-NEXT: packssdw %xmm3, %xmm4 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7] +; SSE41-NEXT: packusdw %xmm2, %xmm1 +; SSE41-NEXT: psubw %xmm1, %xmm0 ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: retq ; @@ -1044,42 +1028,47 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind { ; AVX1: # %bb.0: # %vector.ph ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4 -; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm5 -; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpandn %xmm0, %xmm5, %xmm0 -; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpandn %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test16: ; AVX2: # %bb.0: # %vector.ph -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 -; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmaxud %ymm2, %ymm1, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test16: ; AVX512: # %bb.0: # %vector.ph -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: vpcmpltud %ymm2, %ymm1, %k1 +; AVX512-NEXT: vpmovdw %ymm1, %xmm1 +; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq vector.ph: @@ -2057,60 +2046,57 @@ vector.ph: define <8 x i16> @psubus_i16_i32_min(<8 x i16> %x, <8 x i32> %y) nounwind { ; SSE2-LABEL: psubus_i16_i32_min: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: pxor %xmm4, %xmm5 -; SSE2-NEXT: movdqa %xmm0, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] +; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm6, %xmm3 +; SSE2-NEXT: movdqa %xmm5, %xmm7 +; SSE2-NEXT: por %xmm6, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm7, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm5 +; SSE2-NEXT: pandn %xmm1, %xmm3 +; SSE2-NEXT: por %xmm5, %xmm3 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm6, %xmm1 ; SSE2-NEXT: por %xmm4, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 -; SSE2-NEXT: movdqa %xmm0, %xmm6 -; SSE2-NEXT: pand %xmm5, %xmm6 -; SSE2-NEXT: pandn %xmm2, %xmm5 -; SSE2-NEXT: por %xmm6, %xmm5 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: por %xmm3, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm2 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pand %xmm2, %xmm4 -; SSE2-NEXT: pandn %xmm1, %xmm2 -; SSE2-NEXT: por %xmm4, %xmm2 -; SSE2-NEXT: psubd %xmm2, %xmm3 -; SSE2-NEXT: psubd %xmm5, %xmm0 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm4 +; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 ; SSE2-NEXT: pslld $16, %xmm3 ; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: packssdw %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm3 +; SSE2-NEXT: psubw %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: psubus_i16_i32_min: ; SSSE3: # %bb.0: # %vector.ph -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] -; SSSE3-NEXT: movdqa %xmm2, %xmm5 -; SSSE3-NEXT: pxor %xmm4, %xmm5 -; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183,2147549183,2147549183] -; SSSE3-NEXT: movdqa %xmm6, %xmm7 -; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 -; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,65535] -; SSSE3-NEXT: pand %xmm7, %xmm2 -; SSSE3-NEXT: pandn %xmm5, %xmm7 -; SSSE3-NEXT: por %xmm2, %xmm7 -; SSSE3-NEXT: pshufb %xmm3, %xmm7 -; SSSE3-NEXT: pxor %xmm1, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm2, %xmm4 +; SSSE3-NEXT: pxor %xmm3, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 ; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 -; SSSE3-NEXT: pand %xmm6, %xmm1 -; SSSE3-NEXT: pandn %xmm5, %xmm6 -; SSSE3-NEXT: por %xmm1, %xmm6 -; SSSE3-NEXT: pshufb %xmm3, %xmm6 -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0] -; SSSE3-NEXT: psubusw %xmm6, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535] +; SSSE3-NEXT: pand %xmm6, %xmm2 +; SSSE3-NEXT: pandn %xmm4, %xmm6 +; SSSE3-NEXT: por %xmm2, %xmm6 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm6 +; SSSE3-NEXT: pxor %xmm1, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm1 +; SSSE3-NEXT: pandn %xmm4, %xmm5 +; SSSE3-NEXT: por %xmm1, %xmm5 +; SSSE3-NEXT: pshufb %xmm2, %xmm5 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] +; SSSE3-NEXT: psubusw %xmm5, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: psubus_i16_i32_min: From d1dbda10cefeaa124e28eb289cdc92c049c3d973 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 25 May 2020 22:26:50 +0200 Subject: [PATCH 050/770] [libc++] [LWG3201] Update status page: lerp should be marked noexcept. Summary: Update status page and test synopsis. Add synopsis in . Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D80456 --- libcxx/include/cmath | 4 ++++ .../std/numerics/c.math/c.math.lerp/c.math.lerp.pass.cpp | 7 +++---- libcxx/www/cxx2a_status.html | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/libcxx/include/cmath b/libcxx/include/cmath index 0f06486fb34f3..0901a23a2498d 100644 --- a/libcxx/include/cmath +++ b/libcxx/include/cmath @@ -296,6 +296,10 @@ floating_point trunc (arithmetic x); float truncf(float x); long double truncl(long double x); +constexpr float lerp(float a, float b, float t) noexcept; // C++20 +constexpr double lerp(double a, double b, double t) noexcept; // C++20 +constexpr long double lerp(long double a, long double b, long double t) noexcept; // C++20 + } // std */ diff --git a/libcxx/test/std/numerics/c.math/c.math.lerp/c.math.lerp.pass.cpp b/libcxx/test/std/numerics/c.math/c.math.lerp/c.math.lerp.pass.cpp index 7d9ceef8b48e8..6eeeec4898d64 100644 --- a/libcxx/test/std/numerics/c.math/c.math.lerp/c.math.lerp.pass.cpp +++ b/libcxx/test/std/numerics/c.math/c.math.lerp/c.math.lerp.pass.cpp @@ -8,10 +8,9 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 // -// constexpr float lerp(float a, float b, float t); -// constexpr double lerp(double a, double b, double t); -// constexpr long double lerp(long double a, long double b, long double t); - +// constexpr float lerp(float a, float b, float t) noexcept; +// constexpr double lerp(double a, double b, double t) noexcept; +// constexpr long double lerp(long double a, long double b, long double t) noexcept; #include #include diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index 2a93d35feb50f..6214dd2cdd838 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -477,7 +477,7 @@

Library Working group Issues Status

3175The CommonReference requirement of concept SwappableWith is not satisfied in the examplePrague 3194ConvertibleTo prose does not match codePrague 3200midpoint should not constrain T is completePrague - 3201lerp should be marked as noexceptPrague + 3201lerp should be marked as noexceptPragueComplete 3226zoned_time constructor from string_view should accept zoned_time<Duration2, TimeZonePtr2>Prague 3233Broken requirements for shared_ptr converting constructorsPrague 3237LWG 3038 and 3190 have inconsistent PRsPrague From bc93c2d72e84c38fc86e64c9c26aafcf2c61457a Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 25 May 2020 22:34:08 +0200 Subject: [PATCH 051/770] [Transforms] Fix typos. NFC --- .../llvm/Transforms/Utils/CallGraphUpdater.h | 2 +- .../Transforms/IPO/AttributorAttributes.cpp | 2 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 4 +-- .../Instrumentation/PoisonChecking.cpp | 2 +- .../lib/Transforms/Scalar/LoopPredication.cpp | 26 +++++++++---------- .../RewriteStatepointsForGC/preprocess.ll | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h index 6103859ca959a..22954b469186c 100644 --- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h @@ -28,7 +28,7 @@ namespace llvm { class CallGraphUpdater { /// Containers for functions which we did replace or want to delete when /// `finalize` is called. This can happen explicitly or as part of the - /// destructor. Dead functions in comdat sections are tracked seperatly + /// destructor. Dead functions in comdat sections are tracked separately /// because a function with discardable linakage in a COMDAT should only /// be dropped if the entire COMDAT is dropped, see git ac07703842cf. ///{ diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 6b3876ed91aae..f641ad4498cd1 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -54,7 +54,7 @@ STATISTIC(NumAAs, "Number of abstract attributes created"); // } // If there is a single "increment" side one can use the macro // STATS_DECLTRACK with a custom message. If there are multiple increment -// sides, STATS_DECL and STATS_TRACK can also be used separatly. +// sides, STATS_DECL and STATS_TRACK can also be used separately. // #define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \ ("Number of " #TYPE " marked '" #NAME "'") diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index dd66c6703ba09..63eddbda94e73 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -224,7 +224,7 @@ struct OpenMPOpt { OMPRTL_omp_get_partition_num_places, OMPRTL_omp_get_partition_place_nums}; - // Global-tid is handled separatly. + // Global-tid is handled separately. SmallSetVector GTIdArgs; collectGlobalThreadIdArguments(GTIdArgs); LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() @@ -556,7 +556,7 @@ struct OpenMPOpt { auto &ORE = OREGetter(F); ORE.emit([&]() { - return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); + return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); } diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp index e5c338fed9523..85e096112fca1 100644 --- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -190,7 +190,7 @@ static void generateCreationChecks(Instruction &I, if (isa(I) && !I.getType()->isVectorTy()) generateCreationChecksForBinOp(I, Checks); - // Handle non-binops seperately + // Handle non-binops separately switch (I.getOpcode()) { default: // Note there are a couple of missing cases here, once implemented, this diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 1d73a9f24453a..edde22d6708fe 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -268,7 +268,7 @@ class LoopPredication { /// Return an insertion point suitable for inserting a safe to speculate /// instruction whose only user will be 'User' which has operands 'Ops'. A /// trivial result would be the at the User itself, but we try to return a - /// loop invariant location if possible. + /// loop invariant location if possible. Instruction *findInsertPt(Instruction *User, ArrayRef Ops); /// Same as above, *except* that this uses the SCEV definition of invariant /// which is that an expression *can be made* invariant via SCEVExpander. @@ -278,7 +278,7 @@ class LoopPredication { /// Return true if the value is known to produce a single fixed value across /// all iterations on which it executes. Note that this does not imply - /// speculation safety. That must be established seperately. + /// speculation safety. That must be established separately. bool isLoopInvariantValue(const SCEV* S); Value *expandCheck(SCEVExpander &Expander, Instruction *Guard, @@ -398,7 +398,7 @@ LoopPredication::parseLoopICmp(ICmpInst *ICI) { } Value *LoopPredication::expandCheck(SCEVExpander &Expander, - Instruction *Guard, + Instruction *Guard, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { Type *Ty = LHS->getType(); @@ -522,7 +522,7 @@ Instruction *LoopPredication::findInsertPt(Instruction *Use, return Preheader->getTerminator(); } -bool LoopPredication::isLoopInvariantValue(const SCEV* S) { +bool LoopPredication::isLoopInvariantValue(const SCEV* S) { // Handling expressions which produce invariant results, but *haven't* yet // been removed from the loop serves two important purposes. // 1) Most importantly, it resolves a pass ordering cycle which would @@ -535,12 +535,12 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) { // much more obviously in the IR. Otherwise, the cost modeling for other // transforms would end up needing to duplicate all of this logic to model a // check which becomes predictable based on a modeled peel or unswitch. - // + // // The cost of doing so in the worst case is an extra fill from the stack in // the loop to materialize the loop invariant test value instead of checking // against the original IV which is presumable in a register inside the loop. // Such cases are presumably rare, and hint at missing oppurtunities for - // other passes. + // other passes. if (SE->isLoopInvariant(S, L)) // Note: This the SCEV variant, so the original Value* may be within the @@ -548,7 +548,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) { return true; // Handle a particular important case which SCEV doesn't yet know about which - // shows up in range checks on arrays with immutable lengths. + // shows up in range checks on arrays with immutable lengths. // TODO: This should be sunk inside SCEV. if (const SCEVUnknown *U = dyn_cast(S)) if (const auto *LI = dyn_cast(U->getValue())) @@ -575,7 +575,7 @@ Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( const SCEV *LatchLimit = LatchCheck.Limit; // Subtlety: We need all the values to be *invariant* across all iterations, // but we only need to check expansion safety for those which *aren't* - // already guaranteed to dominate the guard. + // already guaranteed to dominate the guard. if (!isLoopInvariantValue(GuardStart) || !isLoopInvariantValue(GuardLimit) || !isLoopInvariantValue(LatchStart) || @@ -599,7 +599,7 @@ Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n"); LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n"); LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n"); - + auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS); auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred, @@ -618,7 +618,7 @@ Optional LoopPredication::widenICmpRangeCheckDecrementingLoop( const SCEV *LatchLimit = LatchCheck.Limit; // Subtlety: We need all the values to be *invariant* across all iterations, // but we only need to check expansion safety for those which *aren't* - // already guaranteed to dominate the guard. + // already guaranteed to dominate the guard. if (!isLoopInvariantValue(GuardStart) || !isLoopInvariantValue(GuardLimit) || !isLoopInvariantValue(LatchStart) || @@ -659,7 +659,7 @@ Optional LoopPredication::widenICmpRangeCheckDecrementingLoop( static void normalizePredicate(ScalarEvolution *SE, Loop *L, LoopICmp& RC) { // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the - // ULT/UGE form for ease of handling by our caller. + // ULT/UGE form for ease of handling by our caller. if (ICmpInst::isEquality(RC.Pred) && RC.IV->getStepRecurrence(*SE)->isOne() && SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit)) @@ -1044,7 +1044,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // inserting a branch on the value which can be either poison or undef. In // this case, the branch can legally go either way; we just need to avoid // introducing UB. This is achieved through the use of the freeze - // instruction. + // instruction. SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1072,7 +1072,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // analyzeable after dropping widenability. { bool Invalidate = false; - + for (auto *ExitingBB : ExitingBlocks) { if (LI->getLoopFor(ExitingBB) != L) continue; diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/preprocess.ll b/llvm/test/Transforms/RewriteStatepointsForGC/preprocess.ll index 105e0e88ac215..6e03798b1d2a6 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/preprocess.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/preprocess.ll @@ -29,7 +29,7 @@ next: ; preds = %entry define void @test7() gc "statepoint-example" { ; CHECK-LABEL: test7 ; CHECK-NOT: gc.statepoint -; Need to delete unreachable gc.statepoint invoke - tested seperately given +; Need to delete unreachable gc.statepoint invoke - tested separately given ; a correct implementation could only remove the instructions, not the block ret void From 179c80117c91fc3ba3079740a91de40d98b18916 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 25 May 2020 20:44:38 +0100 Subject: [PATCH 052/770] [LoopUnroll] Remove dead NextBlocks argument (NFC). --- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index c0177bdb3f6fa..23b61c40a7567 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -717,7 +717,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, - ArrayRef NextBlocks, BasicBlock *BlockInLoop, bool NeedConditional) { auto *Term = cast(Src->getTerminator()); @@ -779,7 +778,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, NeedConditional = false; } - setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional); + setDest(Latches[i], Dest, Headers[i], NeedConditional); } } else { // Setup headers to branch to their new successors in the unrolled @@ -803,7 +802,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // unconditional branch for some iterations. NeedConditional = false; - setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional); + setDest(Headers[i], Dest, HeaderSucc[i], NeedConditional); } // Set up latches to branch to the new header in the unrolled iterations or From cec20db588254289dc2953517310b9886f6dc243 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 25 May 2020 15:43:28 -0700 Subject: [PATCH 053/770] [Inlining] Set inline-deferral-scale to 2. Summary: This patch sets inline-deferral-scale to 2. Both internal and SPEC benchmarking show that 2 is the best number among -1, 2, 3, and 4. inline-deferral-scale SPECint2006 ------------------------------------------------------------ -1 38.0 (the default without this patch) 2 38.5 3 38.1 4 38.1 With the new default number, shouldBeDeferred returns true if: TotalCost < IC.getCost() * 2 where TotalCost is TotalSecondaryCost + IC.getCost() * NumCallerUsers. If TotalCost >= 0 and NumCallerUsers >= 2, then TotalCost >= IC.getCost() * 2, so shouldBeDeferred returns true only when NumCallerUsers is 1. Now, if TotalSecondaryCost < 0, which can happen if InlineConstants::LastCallToStaticBonus, a huge number, has been subtracted from TotalSecondaryCost, then TotalCost may be negative. In this case, shouldBeDeferred may return true even when NumCallerUsers >= 2. Reviewers: davidxl, nikic Reviewed By: davidxl Subscribers: xbolva00, hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80229 --- llvm/lib/Analysis/InlineAdvisor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index dcaf9d0fea749..ac3ba451aa3f6 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -45,7 +45,7 @@ static cl::opt static cl::opt InlineDeferralScale("inline-deferral-scale", cl::desc("Scale to limit the cost of inline deferral"), - cl::init(-1), cl::Hidden); + cl::init(2), cl::Hidden); namespace { class DefaultInlineAdvice : public InlineAdvice { From 3a2df3bad07f7e5fc22538ad782e08ee55f29e41 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 22 May 2020 12:33:33 -0700 Subject: [PATCH 054/770] [Clang][test] fix tests when using external assembler. Summary: The test assume using integraed-as, so make it explicit. Reviewered by: aganea Differential Revision: https://reviews.llvm.org/D80454 --- clang/test/Driver/cc1-spawnprocess.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/clang/test/Driver/cc1-spawnprocess.c b/clang/test/Driver/cc1-spawnprocess.c index 8af8cc4c05553..36df7067487c9 100644 --- a/clang/test/Driver/cc1-spawnprocess.c +++ b/clang/test/Driver/cc1-spawnprocess.c @@ -1,18 +1,23 @@ -// RUN: %clang -fintegrated-cc1 -c -### %s 2>&1 | FileCheck %s --check-prefix=YES +// If a toolchain uses an external assembler, the test would fail because using +// an external assember would increase job counts. Most toolchains in tree +// use integrated assembler, but we still support external assembler. +// So -fintegrated-as is specified explicitly when applicable. + +// RUN: %clang -fintegrated-cc1 -fintegrated-as -c -### %s 2>&1 | FileCheck %s --check-prefix=YES // RUN: %clang -fno-integrated-cc1 -c -### %s 2>&1 | FileCheck %s --check-prefix=NO // RUN: %clang -fintegrated-cc1 -fno-integrated-cc1 -c -### %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO -// RUN: %clang -fno-integrated-cc1 -fintegrated-cc1 -c -### %s 2>&1 \ +// RUN: %clang -fno-integrated-cc1 -fintegrated-cc1 -fintegrated-as -c -### %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=YES -// RUN: %clang_cl -fintegrated-cc1 -c -### -- %s 2>&1 \ +// RUN: %clang_cl -fintegrated-cc1 -fintegrated-as -c -### -- %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=YES // RUN: %clang_cl -fno-integrated-cc1 -c -### -- %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO // RUN: env CCC_OVERRIDE_OPTIONS=+-fintegrated-cc1 \ -// RUN: %clang -fintegrated-cc1 -c -### %s 2>&1 \ +// RUN: %clang -fintegrated-cc1 -fintegrated-as -c -### %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=YES // RUN: env CCC_OVERRIDE_OPTIONS=+-fno-integrated-cc1 \ // RUN: %clang -fintegrated-cc1 -c -### %s 2>&1 \ @@ -24,7 +29,7 @@ // The following tests ensure that only one integrated-cc1 is executed. // Only one TU, one job, thus integrated-cc1 is enabled. -// RUN: %clang -fintegrated-cc1 -c %s -### 2>&1 | FileCheck %s --check-prefix=YES +// RUN: %clang -fintegrated-cc1 -fintegrated-as -c %s -### 2>&1 | FileCheck %s --check-prefix=YES // Only one TU, but we're linking, two jobs, thus integrated-cc1 is disabled. // RUN: %clang -fintegrated-cc1 %s -### 2>&1 | FileCheck %s --check-prefix=NO From 9a8d7bd77040a6497233ea10fd866ad9de8bf98c Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 25 May 2020 17:36:28 -0700 Subject: [PATCH 055/770] [clang][test] fix tests for external assemblers These three tests depend on using the integrated assembler. Make it explicit by specifying -fintegrated-as. --- clang/test/Driver/debug-prefix-map.S | 4 ++-- clang/test/Driver/flang/flang.f90 | 2 +- clang/test/Driver/flang/flang_ucase.F90 | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/debug-prefix-map.S b/clang/test/Driver/debug-prefix-map.S index 7d12a17479726..6dd1ded9bfdfa 100644 --- a/clang/test/Driver/debug-prefix-map.S +++ b/clang/test/Driver/debug-prefix-map.S @@ -1,5 +1,5 @@ -// RUN: %clang -### -g -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s -// RUN: %clang -### -g -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s +// RUN: %clang -### -g -fintegrated-as -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s +// RUN: %clang -### -g -fintegrated-as -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s // CHECK: cc1as // CHECK-SAME: -fdebug-prefix-map=old=new diff --git a/clang/test/Driver/flang/flang.f90 b/clang/test/Driver/flang/flang.f90 index 9d47c7c90225c..a68be31343f9c 100644 --- a/clang/test/Driver/flang/flang.f90 +++ b/clang/test/Driver/flang/flang.f90 @@ -43,7 +43,7 @@ ! CHECK-S-DAG: "-S" ! CHECK-S-DAG: "-o" "{{[^"]*}}.s" -! RUN: %clang --driver-mode=flang -### %s 2>&1 | FileCheck --check-prefixes=ALL,CHECK-EMIT-OBJ %s +! RUN: %clang --driver-mode=flang -### -fintegrated-as %s 2>&1 | FileCheck --check-prefixes=ALL,CHECK-EMIT-OBJ %s ! CHECK-EMIT-OBJ-DAG: "-emit-obj" ! CHECK-EMIT-OBJ-DAG: "-o" "{{[^"]*}}.o" diff --git a/clang/test/Driver/flang/flang_ucase.F90 b/clang/test/Driver/flang/flang_ucase.F90 index 323afb21dccf5..dd1e20088191f 100644 --- a/clang/test/Driver/flang/flang_ucase.F90 +++ b/clang/test/Driver/flang/flang_ucase.F90 @@ -43,7 +43,7 @@ ! CHECK-S-DAG: "-S" ! CHECK-S-DAG: "-o" "{{[^"]*}}.s" -! RUN: %clang --driver-mode=flang -### %s 2>&1 | FileCheck --check-prefixes=ALL,CHECK-EMIT-OBJ %s +! RUN: %clang --driver-mode=flang -### -fintegrated-as %s 2>&1 | FileCheck --check-prefixes=ALL,CHECK-EMIT-OBJ %s ! CHECK-EMIT-OBJ-DAG: "-emit-obj" ! CHECK-EMIT-OBJ-DAG: "-o" "{{[^"]*}}.o" From 793cc518b9428a0b7a40c59d4ecd5939a7bc84f7 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 25 May 2020 20:07:22 -0500 Subject: [PATCH 056/770] [PowerPC] Prevent legalization loop from promoting SELECT_CC from v4i32 to v4i32 As reported in https://bugs.llvm.org/show_bug.cgi?id=45709 we can hit an infinite loop in legalization since we set the legalization action for ISD::SELECT_CC for all fixed length vector types to Promote. Without some different legalization action for the type being promoted to, the legalizer simply loops. Since we don't have patterns to match the node, the right legalization action should be Expand. Differential revision: https://reviews.llvm.org/D79854 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 + llvm/test/CodeGen/PowerPC/pr45709.ll | 58 +++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pr45709.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d42eaa7b77062..2f9ff293c2775 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -718,6 +718,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand); if (!Subtarget.hasP8Vector()) { setOperationAction(ISD::SMAX, MVT::v2i64, Expand); setOperationAction(ISD::SMIN, MVT::v2i64, Expand); diff --git a/llvm/test/CodeGen/PowerPC/pr45709.ll b/llvm/test/CodeGen/PowerPC/pr45709.ll new file mode 100644 index 0000000000000..bc295fafd2105 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr45709.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mcpu=pwr6 -ppc-asm-full-reg-names -mattr=-vsx \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s + +; There is code in the SDAG to expand FMAX/FMIN with fast flags to SELECT_CC. +; On PPC, we had SELECT_CC legalized using Promote for all vector types +; (including the type that they are all promoted to - which caused an infinite +; loop in legalization). This test just ensures that we terminate on such input. +define dso_local void @_ZN1a1bEv(<4 x float> %in) local_unnamed_addr #0 align 2 { +; CHECK-LABEL: _ZN1a1bEv: +; CHECK: # %bb.0: +; CHECK-NEXT: bclr 12, 4*cr5+lt, 0 +; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-NEXT: vxor v3, v3, v3 +; CHECK-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-NEXT: lvx v4, 0, r3 +; CHECK-NEXT: addi r3, r1, -48 +; CHECK-NEXT: stvx v3, 0, r3 +; CHECK-NEXT: addi r3, r1, -32 +; CHECK-NEXT: vperm v2, v2, v2, v4 +; CHECK-NEXT: stvx v2, 0, r3 +; CHECK-NEXT: lwz r3, -48(r1) +; CHECK-NEXT: lwz r4, -32(r1) +; CHECK-NEXT: cmpw r4, r3 +; CHECK-NEXT: bc 12, gt, .LBB0_2 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %.preheader +; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB0_3: # %.preheader +; CHECK-NEXT: stw r3, -64(r1) +; CHECK-NEXT: addi r3, r1, -64 +; CHECK-NEXT: lvx v2, 0, r3 +; CHECK-NEXT: addi r3, r1, -16 +; CHECK-NEXT: stvx v2, 0, r3 +; CHECK-NEXT: blr + br i1 undef, label %7, label %1 + +1: ; preds = %1, %0 + br i1 undef, label %2, label %1 + +2: ; preds = %1 + %3 = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> + %4 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %3, <4 x float> zeroinitializer) + %5 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %4, <4 x float> undef) + %6 = extractelement <4 x float> %5, i32 0 + br label %7 + +7: ; preds = %2, %0 + %8 = phi float [ %6, %2 ], [ undef, %0 ] + %9 = fcmp fast une float %8, 0.000000e+00 + ret void +} + +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0 + +attributes #0 = { nounwind } From 9d55e4ee1367b440bb8402ce3a33d5a8b99aee06 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 25 May 2020 15:05:35 -0700 Subject: [PATCH 057/770] Make explicit -fno-semantic-interposition (in -fpic mode) infer dso_local -fno-semantic-interposition is currently the CC1 default. (The opposite disables some interprocedural optimizations.) However, it does not infer dso_local: on most targets accesses to ExternalLinkage functions/variables defined in the current module still need PLT/GOT. This patch makes explicit -fno-semantic-interposition infer dso_local, so that PLT/GOT can be eliminated if targets implement local aliases for AsmPrinter::getSymbolPreferLocal (currently only x86). Currently we check whether the module flag "SemanticInterposition" is 0. If yes, infer dso_local. In the future, we can infer dso_local unless "SemanticInterposition" is 1: frontends other than clang will also benefit from the optimization if they don't bother setting the flag. (There will be risks if they do want ELF interposition: they need to set "SemanticInterposition" to 1.) --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 2 +- clang/lib/CodeGen/CodeGenModule.cpp | 3 ++ clang/lib/Driver/ToolChains/Clang.cpp | 10 ++-- clang/lib/Frontend/CompilerInvocation.cpp | 3 ++ clang/test/CodeGen/semantic-interposition.c | 4 ++ clang/test/Driver/fsemantic-interposition.c | 6 ++- llvm/include/llvm/IR/GlobalValue.h | 1 + llvm/include/llvm/IR/Module.h | 1 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++-- llvm/lib/IR/Globals.cpp | 6 +++ llvm/lib/IR/Module.cpp | 7 +++ llvm/lib/Target/TargetMachine.cpp | 8 ++++ .../semantic-interposition-infer-dsolocal.ll | 46 +++++++++++++++++++ 14 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/X86/semantic-interposition-infer-dsolocal.ll diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index e94305da46ba5..6e72b47f489b5 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -301,6 +301,7 @@ ENUM_LANGOPT(TypeVisibilityMode, Visibility, 3, DefaultVisibility, LANGOPT(SetVisibilityForExternDecls, 1, 0, "apply global symbol visibility to external declarations without an explicit visibility") BENIGN_LANGOPT(SemanticInterposition , 1, 0, "semantic interposition") +BENIGN_LANGOPT(ExplicitNoSemanticInterposition, 1, 0, "explicitly no semantic interposition") ENUM_LANGOPT(StackProtector, StackProtectorMode, 2, SSPOff, "stack protector mode") ENUM_LANGOPT(TrivialAutoVarInit, TrivialAutoVarInitKind, 2, TrivialAutoVarInitKind::Uninitialized, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7213af1731c17..e88e6cf8a1301 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3371,7 +3371,7 @@ defm ipa_cp : BooleanFFlag<"ipa-cp">, Group; defm ivopts : BooleanFFlag<"ivopts">, Group; def fsemantic_interposition : Flag<["-"], "fsemantic-interposition">, Group, Flags<[CC1Option]>; -def fno_semantic_interposition: Flag<["-"], "fno-semantic-interposition">, Group; +def fno_semantic_interposition: Flag<["-"], "fno-semantic-interposition">, Group, Flags<[CC1Option]>; defm non_call_exceptions : BooleanFFlag<"non-call-exceptions">, Group; defm peel_loops : BooleanFFlag<"peel-loops">, Group; defm permissive : BooleanFFlag<"permissive">, Group; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 8ba7fb756ada8..f43bc6434dafd 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -493,6 +493,9 @@ void CodeGenModule::Release() { if (Context.getLangOpts().SemanticInterposition) // Require various optimization to respect semantic interposition. getModule().setSemanticInterposition(1); + else if (Context.getLangOpts().ExplicitNoSemanticInterposition) + // Allow dso_local on applicable targets. + getModule().setSemanticInterposition(0); if (CodeGenOpts.EmitCodeView) { // Indicate that we want CodeView in the metadata. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index deb60ed68cfca..f33983db3e1eb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4471,10 +4471,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(A->getValue()); } - if (Args.hasFlag(options::OPT_fsemantic_interposition, - options::OPT_fno_semantic_interposition, false) && - RelocationModel != llvm::Reloc::Static && !IsPIE) - CmdArgs.push_back("-fsemantic-interposition"); + // The default is -fno-semantic-interposition. We render it just because we + // require explicit -fno-semantic-interposition to infer dso_local. + if (Arg *A = Args.getLastArg(options::OPT_fsemantic_interposition, + options::OPT_fno_semantic_interposition)) + if (RelocationModel != llvm::Reloc::Static && !IsPIE) + A->render(Args, CmdArgs); CmdArgs.push_back("-mthread-model"); if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index b4bc027e832b1..f98490cd9a114 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3049,6 +3049,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, } Opts.SemanticInterposition = Args.hasArg(OPT_fsemantic_interposition); + // An explicit -fno-semantic-interposition infers dso_local. + Opts.ExplicitNoSemanticInterposition = + Args.hasArg(OPT_fno_semantic_interposition); // -mrtd option if (Arg *A = Args.getLastArg(OPT_mrtd)) { diff --git a/clang/test/CodeGen/semantic-interposition.c b/clang/test/CodeGen/semantic-interposition.c index 43656e36021ff..3d6c5f2872b57 100644 --- a/clang/test/CodeGen/semantic-interposition.c +++ b/clang/test/CodeGen/semantic-interposition.c @@ -1,5 +1,9 @@ // RUN: %clang_cc1 -emit-llvm -fsemantic-interposition %s -o - | FileCheck --check-prefix=INTERPOSITION %s // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck --check-prefix=NO %s +/// With explicit -fno-semantic-interposition, add a module flag to inform the +/// backend that dso_local can be inferred. +// RUN: %clang_cc1 -emit-llvm -fno-semantic-interposition %s -o - | FileCheck --check-prefix=EXPLICIT_NO %s // INTERPOSITION: !{{[0-9]+}} = !{i32 1, !"SemanticInterposition", i32 1} // NO-NOT: "SemanticInterposition" +// EXPLICIT_NO: !{{[0-9]+}} = !{i32 1, !"SemanticInterposition", i32 0} diff --git a/clang/test/Driver/fsemantic-interposition.c b/clang/test/Driver/fsemantic-interposition.c index 20bc2c6f72703..af3e7575a7997 100644 --- a/clang/test/Driver/fsemantic-interposition.c +++ b/clang/test/Driver/fsemantic-interposition.c @@ -2,8 +2,12 @@ // RUN: %clang -target x86_64 %s -Werror -fPIC -fsemantic-interposition -c -### 2>&1 | FileCheck %s // CHECK: "-fsemantic-interposition" -// RUN: %clang -target x86_64 %s -Werror -fPIC -fsemantic-interposition -fno-semantic-interposition -c -### 2>&1 | FileCheck --check-prefix=NO %s +/// Require explicit -fno-semantic-interposition to infer dso_local. +// RUN: %clang -target x86_64 %s -Werror -fPIC -fsemantic-interposition -fno-semantic-interposition -c -### 2>&1 | FileCheck --check-prefix=EXPLICIT_NO %s +// EXPLICIT_NO: "-fno-semantic-interposition" + // RUN: %clang -target x86_64 %s -Werror -fsemantic-interposition -c -### 2>&1 | FileCheck --check-prefix=NO %s // RUN: %clang -target x86_64 %s -Werror -fPIC -c -### 2>&1 | FileCheck --check-prefix=NO %s // RUN: %clang -target x86_64 %s -Werror -fPIE -fsemantic-interposition -c -### 2>&1 | FileCheck --check-prefix=NO %s // NO-NOT: "-fsemantic-interposition" +// NO-NOT: "-fno-semantic-interposition" diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index 398eca2d9b2e9..1c19011c9131c 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -427,6 +427,7 @@ class GlobalValue : public Constant { /// inlining across interposable call edges, since the callee can be /// replaced with something arbitrary. bool isInterposable() const; + bool canBenefitFromLocalAlias() const; bool hasExternalLinkage() const { return isExternalLinkage(getLinkage()); } bool hasAvailableExternallyLinkage() const { diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 3052651a37226..ead003007904c 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -857,6 +857,7 @@ class Module { /// Returns whether semantic interposition is to be respected. bool getSemanticInterposition() const; + bool noSemanticInterposition() const; /// Set whether semantic interposition is to be respected. void setSemanticInterposition(bool); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index db458e2b8a92c..5fba0f01ba524 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -462,10 +462,10 @@ MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const { // assembler would otherwise be conservative and assume a global default // visibility symbol can be interposable, even if the code generator already // assumed it. - if (TM.getTargetTriple().isOSBinFormatELF() && - GlobalObject::isExternalLinkage(GV.getLinkage()) && GV.isDSOLocal() && - !GV.isDeclaration() && !isa(GV) && !GV.hasComdat()) - return getSymbolWithGlobalValueBase(&GV, "$local"); + if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) + if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() && + GV.getParent()->noSemanticInterposition())) + return getSymbolWithGlobalValueBase(&GV, "$local"); return TM.getSymbol(&GV); } diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 7c1c682d0262e..eefd221ec389d 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -101,6 +101,12 @@ bool GlobalValue::isInterposable() const { !isDSOLocal(); } +bool GlobalValue::canBenefitFromLocalAlias() const { + // See AsmPrinter::getSymbolPreferLocal(). + return GlobalObject::isExternalLinkage(getLinkage()) && !isDeclaration() && + !isa(this) && !hasComdat(); +} + unsigned GlobalValue::getAlignment() const { if (auto *GA = dyn_cast(this)) { // In general we cannot compute this at the IR level, but we try. diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 9ac1edb2519d3..1416cdce99749 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -600,6 +600,13 @@ void Module::setSemanticInterposition(bool SI) { addModuleFlag(ModFlagBehavior::Error, "SemanticInterposition", SI); } +bool Module::noSemanticInterposition() const { + // Conservatively require an explicit zero value for now. + Metadata *MF = getModuleFlag("SemanticInterposition"); + auto *Val = cast_or_null(MF); + return Val && cast(Val->getValue())->getZExtValue() == 0; +} + void Module::setOwnedMemoryBuffer(std::unique_ptr MB) { OwnedMemoryBuffer = std::move(MB); } diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index 1de6e871569cd..074e9fde79e6b 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -193,6 +193,14 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, // Check if we can use copy relocations. if (!(GV && GV->isThreadLocal()) && RM == Reloc::Static) return true; + } else if (TT.isOSBinFormatELF()) { + // If dso_local allows AsmPrinter::getSymbolPreferLocal to use a local + // alias, set the flag. We cannot set dso_local for other global values, + // because otherwise direct accesses to a probably interposable symbol (even + // if the codegen assumes not) will be rejected by the linker. + if (!GV || !GV->canBenefitFromLocalAlias()) + return false; + return TT.isX86() && M.noSemanticInterposition(); } // ELF & wasm support preemption of other symbols. diff --git a/llvm/test/CodeGen/X86/semantic-interposition-infer-dsolocal.ll b/llvm/test/CodeGen/X86/semantic-interposition-infer-dsolocal.ll new file mode 100644 index 0000000000000..a0391d0364681 --- /dev/null +++ b/llvm/test/CodeGen/X86/semantic-interposition-infer-dsolocal.ll @@ -0,0 +1,46 @@ +; RUN: llc -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s + +;; With a module flag SemanticInterposition=0, infer dso_local flags even if PIC. +;; Local aliases will be generated for applicable variables and functions. + +@var = global i32 0, align 4 + +@ifunc = ifunc i32 (), bitcast (i32 ()* ()* @ifunc_resolver to i32 ()*) + +define i32 @ifunc_impl() { +entry: + ret i32 0 +} + +define i32 ()* @ifunc_resolver() { +entry: + ret i32 ()* @ifunc_impl +} + +declare i32 @external() + +define i32 @func() { + ret i32 0 +} + +;; Don't set dso_local on declarations or ifuncs. +define i32 @foo() { +; CHECK: movl .Lvar$local(%rip), %ebp +; CHECK: callq external@PLT +; CHECK: callq ifunc@PLT +; CHECK: callq .Lfunc$local{{$}} +entry: + %0 = load i32, i32* @var, align 4 + %call = tail call i32 @external() + %add = add nsw i32 %call, %0 + %call1 = tail call i32 @ifunc() + %add2 = add nsw i32 %add, %call1 + %call2 = tail call i32 @func() + %add3 = add nsw i32 %add, %call2 + ret i32 %add3 +} + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"SemanticInterposition", i32 0} +!1 = !{i32 7, !"PIC Level", i32 2} From d8e0ad9620c6e626d753a3ae0da6c712e4d400d3 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 25 May 2020 22:14:05 -0700 Subject: [PATCH 058/770] [clang][test] fix tests for external assemblers The test depends on using the integrated assembler. Make it explicit by specifying -fintegrated-as. --- clang/test/Driver/modules-ts.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/modules-ts.cpp b/clang/test/Driver/modules-ts.cpp index 3847b71f7b742..80eef081371fa 100644 --- a/clang/test/Driver/modules-ts.cpp +++ b/clang/test/Driver/modules-ts.cpp @@ -9,7 +9,7 @@ // Check compiling a .pcm file to a .o file. // -// RUN: %clang -fmodules-ts %t.pcm -c -o %t.pcm.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-COMPILE +// RUN: %clang -fmodules-ts -fintegrated-as %t.pcm -c -o %t.pcm.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-COMPILE // // CHECK-COMPILE: -cc1 {{.*}} -emit-obj // CHECK-COMPILE-SAME: -o {{.*}}.pcm.o @@ -18,7 +18,7 @@ // Check use of a .pcm file in another compilation. // -// RUN: %clang -fmodules-ts -fmodule-file=%t.pcm -Dexport= %s -c -o %t.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-USE +// RUN: %clang -fmodules-ts -fmodule-file=%t.pcm -fintegrated-as -Dexport= %s -c -o %t.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-USE // // CHECK-USE: -cc1 // CHECK-USE-SAME: -emit-obj @@ -28,7 +28,7 @@ // Check combining precompile and compile steps works. // -// RUN: %clang -fmodules-ts -x c++-module %s -c -o %t.pcm.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-PRECOMPILE --check-prefix=CHECK-COMPILE +// RUN: %clang -fmodules-ts -fintegrated-as -x c++-module %s -c -o %t.pcm.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-PRECOMPILE --check-prefix=CHECK-COMPILE // Check that .cppm is treated as a module implicitly. // RUN: cp %s %t.cppm From eeedbd033612e105755156023bdeec2fba4eca21 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 25 May 2020 17:25:55 +0200 Subject: [PATCH 059/770] [clangd] Make use of SourceOrder to find first initializer in DefineOutline Summary: Constructors can have implicit initializers, this was crashing define outline. Make sure we find the first "written" ctor initializer to figure out `:` location. Fixes https://github.com/clangd/clangd/issues/400 Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80521 --- .../clangd/refactor/tweaks/DefineOutline.cpp | 10 ++-- .../clangd/unittests/TweakTests.cpp | 46 +++++++++++++++++-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp index 405ff90a5945c..63a5ba6cb9988 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -317,18 +317,16 @@ SourceRange getDeletionRange(const FunctionDecl *FD, const syntax::TokenBuffer &TokBuf) { auto DeletionRange = FD->getBody()->getSourceRange(); if (auto *CD = llvm::dyn_cast(FD)) { - const auto &SM = TokBuf.sourceManager(); // AST doesn't contain the location for ":" in ctor initializers. Therefore // we find it by finding the first ":" before the first ctor initializer. SourceLocation InitStart; // Find the first initializer. for (const auto *CInit : CD->inits()) { - // We don't care about in-class initializers. - if (CInit->isInClassMemberInitializer()) + // SourceOrder is -1 for implicit initializers. + if (CInit->getSourceOrder() != 0) continue; - if (InitStart.isInvalid() || - SM.isBeforeInTranslationUnit(CInit->getSourceLocation(), InitStart)) - InitStart = CInit->getSourceLocation(); + InitStart = CInit->getSourceLocation(); + break; } if (InitStart.isValid()) { auto Toks = TokBuf.expandedTokens(CD->getSourceRange()); diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index b0a941dae5d2c..319d9e088c2d8 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -2059,21 +2059,57 @@ TEST_F(DefineOutlineTest, ApplyTest) { "void foo(int x, int y = 5, int = 2, int (*foo)(int) = nullptr) ;", "void foo(int x, int y , int , int (*foo)(int) ) {}", }, - // Ctor initializers. + // Constructors + { + R"cpp( + class Foo {public: Foo(); Foo(int);}; + class Bar { + Ba^r() {} + Bar(int x) : f1(x) {} + Foo f1; + Foo f2 = 2; + };)cpp", + R"cpp( + class Foo {public: Foo(); Foo(int);}; + class Bar { + Bar() ; + Bar(int x) : f1(x) {} + Foo f1; + Foo f2 = 2; + };)cpp", + "Bar::Bar() {}\n", + }, + // Ctor with initializer. + { + R"cpp( + class Foo {public: Foo(); Foo(int);}; + class Bar { + Bar() {} + B^ar(int x) : f1(x), f2(3) {} + Foo f1; + Foo f2 = 2; + };)cpp", + R"cpp( + class Foo {public: Foo(); Foo(int);}; + class Bar { + Bar() {} + Bar(int x) ; + Foo f1; + Foo f2 = 2; + };)cpp", + "Bar::Bar(int x) : f1(x), f2(3) {}\n", + }, + // Ctor initializer with attribute. { R"cpp( class Foo { - int y = 2; F^oo(int z) __attribute__((weak)) : bar(2){} int bar; - int z = 2; };)cpp", R"cpp( class Foo { - int y = 2; Foo(int z) __attribute__((weak)) ; int bar; - int z = 2; };)cpp", "Foo::Foo(int z) __attribute__((weak)) : bar(2){}\n", }, From 34e39eb2adc2b3f16c2c2c0607a904ee55705c01 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 5 May 2020 17:55:11 +0200 Subject: [PATCH 060/770] [clangd] Change PreambleOnlyAction with content truncation Summary: Lexing until the token location is past preamble bound could be wrong in some cases as preprocessor lexer can lex multiple tokens in a single call. Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D79426 --- clang-tools-extra/clangd/Preamble.cpp | 29 ++++++------------- .../clangd/unittests/PreambleTests.cpp | 5 ++++ 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 9d9c5eff8c682..d3eaa92d4c1ac 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -104,24 +104,6 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { const SourceManager *SourceMgr = nullptr; }; -// Runs preprocessor over preamble section. -class PreambleOnlyAction : public PreprocessorFrontendAction { -protected: - void ExecuteAction() override { - Preprocessor &PP = getCompilerInstance().getPreprocessor(); - auto &SM = PP.getSourceManager(); - PP.EnterMainSourceFile(); - auto Bounds = ComputePreambleBounds(getCompilerInstance().getLangOpts(), - SM.getBuffer(SM.getMainFileID()), 0); - Token Tok; - do { - PP.Lex(Tok); - assert(SM.isInMainFile(Tok.getLocation())); - } while (Tok.isNot(tok::eof) && - SM.getDecomposedLoc(Tok.getLocation()).second < Bounds.Size); - } -}; - /// Gets the includes in the preamble section of the file by running /// preprocessor over \p Contents. Returned includes do not contain resolved /// paths. \p VFS and \p Cmd is used to build the compiler invocation, which @@ -142,8 +124,15 @@ scanPreambleIncludes(llvm::StringRef Contents, "failed to create compiler invocation"); CI->getDiagnosticOpts().IgnoreWarnings = true; auto ContentsBuffer = llvm::MemoryBuffer::getMemBuffer(Contents); + // This means we're scanning (though not preprocessing) the preamble section + // twice. However, it's important to precisely follow the preamble bounds used + // elsewhere. + auto Bounds = + ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0); + auto PreambleContents = + llvm::MemoryBuffer::getMemBufferCopy(Contents.substr(0, Bounds.Size)); auto Clang = prepareCompilerInstance( - std::move(CI), nullptr, std::move(ContentsBuffer), + std::move(CI), nullptr, std::move(PreambleContents), // Provide an empty FS to prevent preprocessor from performing IO. This // also implies missing resolved paths for includes. new llvm::vfs::InMemoryFileSystem, IgnoreDiags); @@ -152,7 +141,7 @@ scanPreambleIncludes(llvm::StringRef Contents, "compiler instance had no inputs"); // We are only interested in main file includes. Clang->getPreprocessorOpts().SingleFileParseMode = true; - PreambleOnlyAction Action; + PreprocessOnlyAction Action; if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) return llvm::createStringError(llvm::inconvertibleErrorCode(), "failed BeginSourceFile"); diff --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp index c1801980b1d50..db615e6e66e13 100644 --- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp +++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp @@ -118,6 +118,11 @@ TEST(PreamblePatchTest, IncludeParsing) { ^#include "a.h" #include )cpp", + // Directive is not part of preamble if it is not the token immediately + // followed by the hash (#). + R"cpp( + ^#include "a.h" + #/**/include )cpp", }; for (const auto Case : Cases) { From e6e89875b04ea521a9dbf3e6a82d81b23f9f77d7 Mon Sep 17 00:00:00 2001 From: Kang Zhang Date: Tue, 26 May 2020 06:14:08 +0000 Subject: [PATCH 061/770] [NFC][PowerPC] Add a new case to test two-address verification --- .../CodeGen/PowerPC/two-address-crash.mir | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/two-address-crash.mir diff --git a/llvm/test/CodeGen/PowerPC/two-address-crash.mir b/llvm/test/CodeGen/PowerPC/two-address-crash.mir new file mode 100644 index 0000000000000..6e98d3d8d398b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/two-address-crash.mir @@ -0,0 +1,45 @@ +# RUN: not --crash llc -mtriple=ppc32-- %s -run-pass=phi-node-elimination \ +# RUN: -verify-machineinstrs -o /dev/null 2>&1 | FileCheck %s + +--- | + define void @VerifyTwoAddressCrash(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) { + %X = shl i16 %div.0.i.i.i.i, 1 + %tmp28.i.i.i.i = shl i32 %L_num.0.i.i.i.i, 1 + %tmp31.i.i.i.i = icmp slt i32 %tmp28.i.i.i.i, %tmp1.i.i206.i.i + %tmp31.i.i.i.i.upgrd.1 = zext i1 %tmp31.i.i.i.i to i16 + %tmp371.i.i.i.i1 = or i16 %tmp31.i.i.i.i.upgrd.1, %X + %div.0.be.i.i.i.i = xor i16 %tmp371.i.i.i.i1, 1 + store i16 %div.0.be.i.i.i.i, i16* %P, align 2 + ret void + } + +... +--- +name: VerifyTwoAddressCrash +body: | + bb.0 (%ir-block.0): + liveins: $r3, $r4, $r5, $r6 + + %3:gprc_and_gprc_nor0 = COPY killed $r6 + %2:gprc = COPY killed $r5 + %1:gprc = COPY killed $r4 + %0:gprc = COPY killed $r3 + %4:gprc = RLWINM killed %1, 1, 0, 30 + %5:crrc = CMPW killed %4, killed %2 + %6:crbitrc = COPY killed %5.sub_lt + %7:gprc_and_gprc_nor0 = LI 0 + %8:gprc_and_gprc_nor0 = LI 1 + %9:gprc = ISEL killed %8, killed %7, killed %6 + %10:gprc = RLWIMI killed %9, killed %0, 1, 0, 30 + %11:gprc = XORI killed %10, 1 + STH killed %11, 0, killed %3 :: (store 2 into %ir.P) + BLR implicit $lr, implicit $rm + +... + +# CHECK-LABEL: Bad machine code: Two-address instruction operands must be identical +# CHECK-NEXT: - function: VerifyTwoAddressCrash +# CHECK-NEXT: - basic block: %bb.0 +# CHECK-NEXT: - instruction: %10:gprc = RLWIMI killed %9:gprc(tied-def 0), killed %3:gprc, 1, 0, 30 +# CHECK-NEXT: - operand 1: killed %9:gprc(tied-def 0) +# CHECK-NEXT: LLVM ERROR: Found 1 machine code errors. From 61f72dd8ace7c4bea1ae74d9734d2b02946b4898 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Tue, 26 May 2020 13:19:01 +0700 Subject: [PATCH 062/770] [FPEnv] Small fixes to implementation of flt.rounds This change makes minor correction to the implementation of intrinsic `llvm.flt.rounds`: - Added documentation entry in LangRef, - Attributes of the intrinsic changed to be in line with other functions dependent of floating-point environment. Differential Revision: https://reviews.llvm.org/D79322 --- clang/include/clang/Basic/Builtins.def | 4 ++- llvm/docs/LangRef.rst | 40 ++++++++++++++++++++++++++ llvm/include/llvm/IR/Intrinsics.td | 9 ++++-- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 4f1a7f24c4329..4c43d63ffec40 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -323,6 +323,9 @@ BUILTIN(__builtin_truncf, "ff", "Fnc") BUILTIN(__builtin_truncl, "LdLd", "Fnc") BUILTIN(__builtin_truncf16, "hh", "Fnc") +// Access to floating point environment +BUILTIN(__builtin_flt_rounds, "i", "n") + // C99 complex builtins BUILTIN(__builtin_cabs, "dXd", "Fne") BUILTIN(__builtin_cabsf, "fXf", "Fne") @@ -517,7 +520,6 @@ BUILTIN(__builtin_return_address, "v*IUi", "n") BUILTIN(__builtin_extract_return_addr, "v*v*", "n") BUILTIN(__builtin_frame_address, "v*IUi", "n") BUILTIN(__builtin___clear_cache, "vc*c*", "n") -BUILTIN(__builtin_flt_rounds, "i", "nc") BUILTIN(__builtin_setjmp, "iv**", "j") BUILTIN(__builtin_longjmp, "vv**i", "r") BUILTIN(__builtin_unwind_init, "v", "") diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index bf0627e441960..8bcad09964e20 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -18284,6 +18284,46 @@ This function returns the same values as the libm ``trunc`` functions would and handles error conditions in the same way. +Floating Point Environment Manipulation intrinsics +-------------------------------------------------- + +These functions read or write floating point environment, such as rounding +mode or state of floating point exceptions. Altering the floating point +environment requires special care. See :ref:`Floating Point Environment `. + +'``llvm.flt.rounds``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 @llvm.flt.rounds() + +Overview: +""""""""" + +The '``llvm.flt.rounds``' intrinsic reads the current rounding mode. + +Semantics: +"""""""""" + +The '``llvm.flt.rounds``' intrinsic returns the current rounding mode. +Encoding of the returned values is same as the result of ``FLT_ROUNDS``, +specified by C standard: + +:: + + 0 - toward zero + 1 - to nearest, ties to even + 2 - toward positive infinity + 3 - toward negative infinity + 4 - to nearest, ties away from zero + +Other values may be used to represent additional rounding modes, supported by a +target. These values are target-specific. + General Intrinsics ------------------ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index dafa17959e826..51df06cee3587 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -612,6 +612,13 @@ def int_objectsize : Intrinsic<[llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, GCCBuiltin<"__builtin_object_size">; +//===--------------- Access to Floating Point Environment -----------------===// +// + +let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { + def int_flt_rounds : Intrinsic<[llvm_i32_ty], []>; +} + //===--------------- Constrained Floating Point Intrinsics ----------------===// // @@ -1115,8 +1122,6 @@ def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty], ///===-------------------------- Other Intrinsics --------------------------===// // -def int_flt_rounds : Intrinsic<[llvm_i32_ty]>, - GCCBuiltin<"__builtin_flt_rounds">; def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>, GCCBuiltin<"__builtin_trap">; def int_debugtrap : Intrinsic<[]>, From 872c5fb1432493c0a09b6f210765c0d94ce9b5d0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 25 May 2020 23:00:50 -0700 Subject: [PATCH 063/770] [AsmPrinter] Don't generate .Lfoo$local for -fno-PIC and -fPIE -fno-PIC and -fPIE code generally cannot be linked in -shared mode and there is no benefit accessing via local aliases. Actually, a .Lfoo$local reference will be converted to a STT_SECTION (if no section relaxation) reference which will cause the section symbol (sizeof(Elf64_Sym)=24) to be generated. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 12 +- .../CodeGen/AArch64/fp16_intrinsic_lane.ll | 34 ---- .../machine-outliner-retaddr-sign-sp-mod.ll | 2 - llvm/test/CodeGen/X86/code-model-elf.ll | 36 ++-- llvm/test/CodeGen/X86/emutls.ll | 8 +- .../X86/indirect-branch-tracking-eh2.ll | 1 - llvm/test/CodeGen/X86/lifetime-alias.ll | 4 +- llvm/test/CodeGen/X86/linux-preemption.ll | 15 +- llvm/test/CodeGen/X86/oddsubvector.ll | 176 +++++++++--------- llvm/test/CodeGen/X86/pr38795.ll | 2 +- .../X86/semantic-interposition-comdat.ll | 7 +- llvm/test/CodeGen/X86/tls.ll | 8 +- 12 files changed, 138 insertions(+), 167 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5fba0f01ba524..1a2b3761b3a79 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -462,10 +462,14 @@ MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const { // assembler would otherwise be conservative and assume a global default // visibility symbol can be interposable, even if the code generator already // assumed it. - if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) - if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() && - GV.getParent()->noSemanticInterposition())) - return getSymbolWithGlobalValueBase(&GV, "$local"); + if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) { + const Module &M = *GV.getParent(); + if (TM.getRelocationModel() != Reloc::Static && + M.getPIELevel() == PIELevel::Default) + if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() && + GV.getParent()->noSemanticInterposition())) + return getSymbolWithGlobalValueBase(&GV, "$local"); + } return TM.getSymbol(&GV); } diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll index 1b0c7c3468870..90a5e2453a776 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll @@ -10,7 +10,6 @@ declare half @llvm.fma.f16(half, half, half) #1 define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfma_lane_f16: -; CHECK: .Lt_vfma_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -24,7 +23,6 @@ entry: define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmaq_lane_f16: -; CHECK: .Lt_vfmaq_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -38,7 +36,6 @@ entry: define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfma_laneq_f16: -; CHECK: .Lt_vfma_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] @@ -51,7 +48,6 @@ entry: define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmaq_laneq_f16: -; CHECK: .Lt_vfmaq_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] @@ -64,7 +60,6 @@ entry: define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) { ; CHECK-LABEL: t_vfma_n_f16: -; CHECK: .Lt_vfma_n_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 @@ -79,7 +74,6 @@ entry: define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { ; CHECK-LABEL: t_vfmaq_n_f16: -; CHECK: .Lt_vfmaq_n_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 @@ -94,7 +88,6 @@ entry: define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmah_lane_f16: -; CHECK: .Lt_vfmah_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -108,7 +101,6 @@ entry: define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmah_laneq_f16: -; CHECK: .Lt_vfmah_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmla h0, h1, v2.h[0] @@ -121,7 +113,6 @@ entry: define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfms_lane_f16: -; CHECK: .Lt_vfms_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -136,7 +127,6 @@ entry: define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsq_lane_f16: -; CHECK: .Lt_vfmsq_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -151,7 +141,6 @@ entry: define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfms_laneq_f16: -; CHECK: .Lt_vfms_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] @@ -165,7 +154,6 @@ entry: define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsq_laneq_f16: -; CHECK: .Lt_vfmsq_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] @@ -179,7 +167,6 @@ entry: define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) { ; CHECK-LABEL: t_vfms_n_f16: -; CHECK: .Lt_vfms_n_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 @@ -195,7 +182,6 @@ entry: define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { ; CHECK-LABEL: t_vfmsq_n_f16: -; CHECK: .Lt_vfmsq_n_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 @@ -211,7 +197,6 @@ entry: define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsh_lane_f16: -; CHECK: .Lt_vfmsh_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -226,7 +211,6 @@ entry: define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsh_laneq_f16: -; CHECK: .Lt_vfmsh_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmls h0, h1, v2.h[0] @@ -240,7 +224,6 @@ entry: define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmul_laneq_f16: -; CHECK: .Lt_vmul_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0] @@ -253,7 +236,6 @@ entry: define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulq_laneq_f16: -; CHECK: .Lt_vmulq_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0] @@ -266,7 +248,6 @@ entry: define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vmulh_lane_f16: -; CHECK: .Lt_vmulh_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -280,7 +261,6 @@ entry: define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vmulh_laneq_f16: -; CHECK: .Lt_vmulh_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmul h0, h0, v1.h[0] @@ -293,7 +273,6 @@ entry: define dso_local half @t_vmulx_f16(half %a, half %b) { ; CHECK-LABEL: t_vmulx_f16: -; CHECK: .Lt_vmulx_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmulx h0, h0, h1 @@ -305,7 +284,6 @@ entry: define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulxh_lane_f16: -; CHECK: .Lt_vmulxh_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -319,7 +297,6 @@ entry: define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulx_lane_f16: -; CHECK: .Lt_vmulx_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -333,7 +310,6 @@ entry: define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulxq_lane_f16: -; CHECK: .Lt_vmulxq_lane_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -347,7 +323,6 @@ entry: define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulx_laneq_f16: -; CHECK: .Lt_vmulx_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0] @@ -360,7 +335,6 @@ entry: define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulxq_laneq_f16: -; CHECK: .Lt_vmulxq_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0] @@ -373,7 +347,6 @@ entry: define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) { ; CHECK-LABEL: t_vmulxh_laneq_f16: -; CHECK: .Lt_vmulxh_laneq_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmulx h0, h0, v1.h[7] @@ -386,7 +359,6 @@ entry: define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) { ; CHECK-LABEL: t_vmulx_n_f16: -; CHECK: .Lt_vmulx_n_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 @@ -402,7 +374,6 @@ entry: define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) { ; CHECK-LABEL: t_vmulxq_n_f16: -; CHECK: .Lt_vmulxq_n_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 @@ -418,7 +389,6 @@ entry: define dso_local half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) { ; CHECK-LABEL: t_vfmah_lane3_f16: -; CHECK: .Lt_vfmah_lane3_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -432,7 +402,6 @@ entry: define dso_local half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) { ; CHECK-LABEL: t_vfmah_laneq7_f16: -; CHECK: .Lt_vfmah_laneq7_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmla h0, h1, v2.h[7] @@ -445,7 +414,6 @@ entry: define dso_local half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) { ; CHECK-LABEL: t_vfmsh_lane3_f16: -; CHECK: .Lt_vfmsh_lane3_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 @@ -460,7 +428,6 @@ entry: define dso_local half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) { ; CHECK-LABEL: t_vfmsh_laneq7_f16: -; CHECK: .Lt_vfmsh_laneq7_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fmls h0, h1, v2.h[7] @@ -474,7 +441,6 @@ entry: define dso_local half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) { ; CHECK-LABEL: t_fadd_vfmah_f16: -; CHECK: .Lt_fadd_vfmah_f16$local: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll index 46355b35d0dec..8fd152869b23c 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.ll @@ -4,7 +4,6 @@ @v = common dso_local global i32* null, align 8 ; CHECK-LABEL: foo: // @foo -; CHECK-NEXT: .Lfoo$local: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: paciasp ; CHECK-NOT: OUTLINED_FUNCTION_ @@ -23,7 +22,6 @@ entry: } ; CHECK-LABEL: bar: // @bar -; CHECK-NEXT: .Lbar$local: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: paciasp ; CHECK-NOT: OUTLINED_FUNCTION_ diff --git a/llvm/test/CodeGen/X86/code-model-elf.ll b/llvm/test/CodeGen/X86/code-model-elf.ll index 748e2b0267d8d..f7ffd6ea1eb7c 100644 --- a/llvm/test/CodeGen/X86/code-model-elf.ll +++ b/llvm/test/CodeGen/X86/code-model-elf.ll @@ -83,28 +83,28 @@ define dso_local i32* @lea_static_data() #0 { define dso_local i32* @lea_global_data() #0 { ; SMALL-STATIC-LABEL: lea_global_data: ; SMALL-STATIC: # %bb.0: -; SMALL-STATIC-NEXT: movl $.Lglobal_data$local, %eax +; SMALL-STATIC-NEXT: movl $global_data, %eax ; SMALL-STATIC-NEXT: retq ; ; MEDIUM-STATIC-LABEL: lea_global_data: ; MEDIUM-STATIC: # %bb.0: -; MEDIUM-STATIC-NEXT: movabsq $.Lglobal_data$local, %rax +; MEDIUM-STATIC-NEXT: movabsq $global_data, %rax ; MEDIUM-STATIC-NEXT: retq ; ; LARGE-STATIC-LABEL: lea_global_data: ; LARGE-STATIC: # %bb.0: -; LARGE-STATIC-NEXT: movabsq $.Lglobal_data$local, %rax +; LARGE-STATIC-NEXT: movabsq $global_data, %rax ; LARGE-STATIC-NEXT: retq ; ; SMALL-PIC-LABEL: lea_global_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: leaq .Lglobal_data$local(%rip), %rax +; SMALL-PIC-NEXT: leaq global_data(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_global_data: ; MEDIUM-PIC: # %bb.0: ; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx -; MEDIUM-PIC-NEXT: movabsq $.Lglobal_data$local@GOTOFF, %rax +; MEDIUM-PIC-NEXT: movabsq $global_data@GOTOFF, %rax ; MEDIUM-PIC-NEXT: addq %rcx, %rax ; MEDIUM-PIC-NEXT: retq ; @@ -114,7 +114,7 @@ define dso_local i32* @lea_global_data() #0 { ; LARGE-PIC-NEXT: leaq .L1$pb(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L1$pb, %rcx ; LARGE-PIC-NEXT: addq %rax, %rcx -; LARGE-PIC-NEXT: movabsq $.Lglobal_data$local@GOTOFF, %rax +; LARGE-PIC-NEXT: movabsq $global_data@GOTOFF, %rax ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: retq ret i32* getelementptr inbounds ([10 x i32], [10 x i32]* @global_data, i64 0, i64 0) @@ -161,30 +161,30 @@ define dso_local i32* @lea_extern_data() #0 { define dso_local i32 @load_global_data() #0 { ; SMALL-STATIC-LABEL: load_global_data: ; SMALL-STATIC: # %bb.0: -; SMALL-STATIC-NEXT: movl .Lglobal_data$local+8(%rip), %eax +; SMALL-STATIC-NEXT: movl global_data+8(%rip), %eax ; SMALL-STATIC-NEXT: retq ; ; MEDIUM-STATIC-LABEL: load_global_data: ; MEDIUM-STATIC: # %bb.0: -; MEDIUM-STATIC-NEXT: movabsq $.Lglobal_data$local, %rax +; MEDIUM-STATIC-NEXT: movabsq $global_data, %rax ; MEDIUM-STATIC-NEXT: movl 8(%rax), %eax ; MEDIUM-STATIC-NEXT: retq ; ; LARGE-STATIC-LABEL: load_global_data: ; LARGE-STATIC: # %bb.0: -; LARGE-STATIC-NEXT: movabsq $.Lglobal_data$local, %rax +; LARGE-STATIC-NEXT: movabsq $global_data, %rax ; LARGE-STATIC-NEXT: movl 8(%rax), %eax ; LARGE-STATIC-NEXT: retq ; ; SMALL-PIC-LABEL: load_global_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: movl .Lglobal_data$local+8(%rip), %eax +; SMALL-PIC-NEXT: movl global_data+8(%rip), %eax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: load_global_data: ; MEDIUM-PIC: # %bb.0: ; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax -; MEDIUM-PIC-NEXT: movabsq $.Lglobal_data$local@GOTOFF, %rcx +; MEDIUM-PIC-NEXT: movabsq $global_data@GOTOFF, %rcx ; MEDIUM-PIC-NEXT: movl 8(%rax,%rcx), %eax ; MEDIUM-PIC-NEXT: retq ; @@ -194,7 +194,7 @@ define dso_local i32 @load_global_data() #0 { ; LARGE-PIC-NEXT: leaq .L3$pb(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L3$pb, %rcx ; LARGE-PIC-NEXT: addq %rax, %rcx -; LARGE-PIC-NEXT: movabsq $.Lglobal_data$local@GOTOFF, %rax +; LARGE-PIC-NEXT: movabsq $global_data@GOTOFF, %rax ; LARGE-PIC-NEXT: movl 8(%rcx,%rax), %eax ; LARGE-PIC-NEXT: retq %rv = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @global_data, i64 0, i64 2) @@ -302,27 +302,27 @@ define dso_local void ()* @lea_static_fn() #0 { define dso_local void ()* @lea_global_fn() #0 { ; SMALL-STATIC-LABEL: lea_global_fn: ; SMALL-STATIC: # %bb.0: -; SMALL-STATIC-NEXT: movl $.Lglobal_fn$local, %eax +; SMALL-STATIC-NEXT: movl $global_fn, %eax ; SMALL-STATIC-NEXT: retq ; ; MEDIUM-STATIC-LABEL: lea_global_fn: ; MEDIUM-STATIC: # %bb.0: -; MEDIUM-STATIC-NEXT: movabsq $.Lglobal_fn$local, %rax +; MEDIUM-STATIC-NEXT: movabsq $global_fn, %rax ; MEDIUM-STATIC-NEXT: retq ; ; LARGE-STATIC-LABEL: lea_global_fn: ; LARGE-STATIC: # %bb.0: -; LARGE-STATIC-NEXT: movabsq $.Lglobal_fn$local, %rax +; LARGE-STATIC-NEXT: movabsq $global_fn, %rax ; LARGE-STATIC-NEXT: retq ; ; SMALL-PIC-LABEL: lea_global_fn: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: leaq .Lglobal_fn$local(%rip), %rax +; SMALL-PIC-NEXT: leaq global_fn(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_global_fn: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: movabsq $.Lglobal_fn$local, %rax +; MEDIUM-PIC-NEXT: movabsq $global_fn, %rax ; MEDIUM-PIC-NEXT: retq ; ; LARGE-PIC-LABEL: lea_global_fn: @@ -331,7 +331,7 @@ define dso_local void ()* @lea_global_fn() #0 { ; LARGE-PIC-NEXT: leaq .L8$pb(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L8$pb, %rcx ; LARGE-PIC-NEXT: addq %rax, %rcx -; LARGE-PIC-NEXT: movabsq $.Lglobal_fn$local@GOTOFF, %rax +; LARGE-PIC-NEXT: movabsq $global_fn@GOTOFF, %rax ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: retq ret void ()* @global_fn diff --git a/llvm/test/CodeGen/X86/emutls.ll b/llvm/test/CodeGen/X86/emutls.ll index 8d836ef733b5e..1e706c1267d1b 100644 --- a/llvm/test/CodeGen/X86/emutls.ll +++ b/llvm/test/CodeGen/X86/emutls.ll @@ -135,7 +135,7 @@ entry: define i32 @f7() { ; X32-LABEL: f7: -; X32: movl $.L__emutls_v.i4$local, (%esp) +; X32: movl $__emutls_v.i4, (%esp) ; X32-NEXT: calll __emutls_get_address ; X32-NEXT: movl (%eax), %eax ; X32-NEXT: addl $12, %esp @@ -148,7 +148,7 @@ entry: define i32* @f8() { ; X32-LABEL: f8: -; X32: movl $.L__emutls_v.i4$local, (%esp) +; X32: movl $__emutls_v.i4, (%esp) ; X32-NEXT: calll __emutls_get_address ; X32-NEXT: addl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 @@ -258,14 +258,12 @@ entry: ; X32-NEXT: .long 15 ; X32-LABEL: __emutls_v.i4: -; X32-NEXT: .L__emutls_v.i4$local: ; X32-NEXT: .long 4 ; X32-NEXT: .long 4 ; X32-NEXT: .long 0 ; X32-NEXT: .long __emutls_t.i4 ; X32-LABEL: __emutls_t.i4: -; X32-NEXT: .L__emutls_t.i4$local: ; X32-NEXT: .long 15 ; X32-NOT: __emutls_v.i5: @@ -312,14 +310,12 @@ entry: ; X64-NEXT: .long 15 ; X64-LABEL: __emutls_v.i4: -; X64-NEXT: .L__emutls_v.i4$local: ; X64-NEXT: .quad 4 ; X64-NEXT: .quad 4 ; X64-NEXT: .quad 0 ; X64-NEXT: .quad __emutls_t.i4 ; X64-LABEL: __emutls_t.i4: -; X64-NEXT: .L__emutls_t.i4$local: ; X64-NEXT: .long 15 ; X64-NOT: __emutls_v.i5: diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll index 312707a029cd9..6e41c94e979a1 100644 --- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll +++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll @@ -4,7 +4,6 @@ ; NUM-COUNT-3: endbr64 ;SJLJ: main: # @main -;SJLJ-NEXT: .Lmain$local: ;SJLJ-NEXT: .Lfunc_begin0: ;SJLJ-NEXT: # %bb.0: # %entry ;SJLJ-NEXT: endbr64 diff --git a/llvm/test/CodeGen/X86/lifetime-alias.ll b/llvm/test/CodeGen/X86/lifetime-alias.ll index e57f1726a4ee2..010dc33b5051c 100644 --- a/llvm/test/CodeGen/X86/lifetime-alias.ll +++ b/llvm/test/CodeGen/X86/lifetime-alias.ll @@ -70,9 +70,9 @@ define i8 @main() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__g ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq %rax, .Ldo_not_optimize${{.*}}(%rip) +; CHECK-NEXT: movq %rax, do_not_optimize{{.*}}(%rip) ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq %rax, .Ldo_not_optimize${{.*}}(%rip) +; CHECK-NEXT: movq %rax, do_not_optimize{{.*}}(%rip) ; CHECK-NEXT: cmpb $0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: jns .LBB0_1 ; CHECK-NEXT: # %bb.2: # %_ZNSt3__312basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Ev.exit50 diff --git a/llvm/test/CodeGen/X86/linux-preemption.ll b/llvm/test/CodeGen/X86/linux-preemption.ll index 7d22b75132186..49a7becf13432 100644 --- a/llvm/test/CodeGen/X86/linux-preemption.ll +++ b/llvm/test/CodeGen/X86/linux-preemption.ll @@ -41,7 +41,7 @@ define i32* @get_strong_local_global() { ret i32* @strong_local_global } ; CHECK: leaq .Lstrong_local_global$local(%rip), %rax -; STATIC: movl $.Lstrong_local_global$local, %eax +; STATIC: movl $strong_local_global, %eax ; CHECK32: leal .Lstrong_local_global$local@GOTOFF(%eax), %eax @weak_local_global = weak dso_local global i32 42 @@ -109,7 +109,7 @@ define i32* @get_strong_local_alias() { ret i32* @strong_local_alias } ; CHECK: leaq .Lstrong_local_alias$local(%rip), %rax -; STATIC: movl $.Lstrong_local_alias$local, %eax +; STATIC: movl $strong_local_alias, %eax ; CHECK32: leal .Lstrong_local_alias$local@GOTOFF(%eax), %eax @weak_local_alias = weak dso_local alias i32, i32* @aliasee @@ -174,9 +174,9 @@ define void()* @get_strong_local_function() { ret void()* @strong_local_function } ; COMMON: {{^}}strong_local_function: -; COMMON-NEXT: .Lstrong_local_function$local: +; CHECK-NEXT: .Lstrong_local_function$local: ; CHECK: leaq .Lstrong_local_function$local(%rip), %rax -; STATIC: movl $.Lstrong_local_function$local, %eax +; STATIC: movl $strong_local_function, %eax ; CHECK32: leal .Lstrong_local_function$local@GOTOFF(%eax), %eax define weak dso_local void @weak_local_function() { @@ -226,8 +226,11 @@ define void()* @get_external_preemptable_function() { ; STATIC: movl $external_preemptable_function, %eax ; CHECK32: movl external_preemptable_function@GOT(%eax), %eax +!llvm.module.flags = !{!0} +!0 = !{i32 7, !"PIC Level", i32 2} + ; COMMON: {{^}}strong_local_global: -; COMMON-NEXT: .Lstrong_local_global$local: +; CHECK-NEXT: .Lstrong_local_global$local: ; COMMON: .globl strong_default_alias ; COMMON-NEXT: .set strong_default_alias, aliasee @@ -235,7 +238,7 @@ define void()* @get_external_preemptable_function() { ; COMMON-NEXT: .set weak_default_alias, aliasee ; COMMON-NEXT: .globl strong_local_alias ; COMMON-NEXT: .set strong_local_alias, aliasee -; COMMON-NEXT: .set .Lstrong_local_alias$local, aliasee +; CHECK-NEXT: .set .Lstrong_local_alias$local, aliasee ; COMMON-NEXT: .weak weak_local_alias ; COMMON-NEXT: .set weak_local_alias, aliasee ; COMMON-NEXT: .globl strong_preemptable_alias diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll index 8d3e01f86def6..46ff47b2a1001 100644 --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -187,189 +187,189 @@ define <16 x i32> @PR42819(<8 x i32>* %a0) { define void @PR42833() { ; SSE2-LABEL: PR42833: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm0 ; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: addl .Lb${{.*}}(%rip), %eax +; SSE2-NEXT: addl b(%rip), %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: paddd %xmm0, %xmm3 -; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm4 ; SSE2-NEXT: psubd %xmm1, %xmm4 ; SSE2-NEXT: paddd %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm5 ; SSE2-NEXT: paddd %xmm0, %xmm5 ; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3] -; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip) -; SSE2-NEXT: movaps %xmm5, .Lc$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm3 -; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5 -; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6 -; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm7 +; SSE2-NEXT: movdqa %xmm1, c+{{.*}}(%rip) +; SSE2-NEXT: movaps %xmm5, c+{{.*}}(%rip) +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm3 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm5 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm6 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm7 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; SSE2-NEXT: psubd %xmm0, %xmm7 ; SSE2-NEXT: psubd %xmm3, %xmm6 ; SSE2-NEXT: psubd %xmm1, %xmm5 -; SSE2-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa %xmm7, .Ld$local+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm5, d+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm6, d+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm4, d+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm7, d+{{.*}}(%rip) ; SSE2-NEXT: paddd %xmm3, %xmm3 ; SSE2-NEXT: paddd %xmm1, %xmm1 -; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip) -; SSE2-NEXT: movdqa %xmm3, .Lc$local+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm1, c+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm3, c+{{.*}}(%rip) ; SSE2-NEXT: retq ; ; SSE42-LABEL: PR42833: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 -; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm0 +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm1 ; SSE42-NEXT: movd %xmm1, %eax -; SSE42-NEXT: addl .Lb${{.*}}(%rip), %eax +; SSE42-NEXT: addl b(%rip), %eax ; SSE42-NEXT: movd %eax, %xmm2 ; SSE42-NEXT: paddd %xmm1, %xmm2 -; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm3 ; SSE42-NEXT: psubd %xmm0, %xmm3 ; SSE42-NEXT: paddd %xmm0, %xmm0 ; SSE42-NEXT: movdqa %xmm1, %xmm4 ; SSE42-NEXT: paddd %xmm1, %xmm4 ; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7] -; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 -; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm2 -; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4 -; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5 -; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6 +; SSE42-NEXT: movdqa %xmm0, c+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm4, c+{{.*}}(%rip) +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm0 +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm2 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm4 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm5 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm6 ; SSE42-NEXT: pinsrd $0, %eax, %xmm1 ; SSE42-NEXT: psubd %xmm1, %xmm6 ; SSE42-NEXT: psubd %xmm2, %xmm5 ; SSE42-NEXT: psubd %xmm0, %xmm4 -; SSE42-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm4, d+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm5, d+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm3, d+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm6, d+{{.*}}(%rip) ; SSE42-NEXT: paddd %xmm2, %xmm2 ; SSE42-NEXT: paddd %xmm0, %xmm0 -; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip) -; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm0, c+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm2, c+{{.*}}(%rip) ; SSE42-NEXT: retq ; ; AVX1-LABEL: PR42833: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: addl .Lb${{.*}}(%rip), %eax +; AVX1-NEXT: addl b(%rip), %eax ; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 ; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7] -; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2 -; AVX1-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm2 +; AVX1-NEXT: vpsubd c+{{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vmovups %ymm1, c+{{.*}}(%rip) ; AVX1-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm1 +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm1 -; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3 +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 ; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm4 -; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm5 +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm4 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm5 ; AVX1-NEXT: vpsubd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vmovdqa %xmm2, .Ld$local+{{.*}}(%rip) -; AVX1-NEXT: vmovdqa %xmm4, .Ld$local+{{.*}}(%rip) -; AVX1-NEXT: vmovdqa %xmm1, .Ld$local+{{.*}}(%rip) -; AVX1-NEXT: vmovdqa %xmm0, .Ld$local+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm2, d+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm4, d+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm1, d+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm0, d+{{.*}}(%rip) ; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm0 ; AVX1-NEXT: vpaddd %xmm5, %xmm5, %xmm1 -; AVX1-NEXT: vmovdqa %xmm1, .Lc$local+{{.*}}(%rip) -; AVX1-NEXT: vmovdqa %xmm0, .Lc$local+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm1, c+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm0, c+{{.*}}(%rip) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: PR42833: ; AVX2: # %bb.0: -; AVX2-NEXT: movl .Lb${{.*}}(%rip), %eax -; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 -; AVX2-NEXT: addl .Lc$local+{{.*}}(%rip), %eax +; AVX2-NEXT: movl b(%rip), %eax +; AVX2-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX2-NEXT: addl c+{{.*}}(%rip), %eax ; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2 ; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm3 ; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7] -; AVX2-NEXT: vmovdqu %ymm2, .Lc$local+{{.*}}(%rip) -; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm2 -; AVX2-NEXT: vmovdqu .Ld$local+{{.*}}(%rip), %ymm3 -; AVX2-NEXT: vmovdqu .Ld$local+{{.*}}(%rip), %ymm4 +; AVX2-NEXT: vmovdqu %ymm2, c+{{.*}}(%rip) +; AVX2-NEXT: vmovdqu c+{{.*}}(%rip), %ymm2 +; AVX2-NEXT: vmovdqu d+{{.*}}(%rip), %ymm3 +; AVX2-NEXT: vmovdqu d+{{.*}}(%rip), %ymm4 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] ; AVX2-NEXT: vpsubd %ymm0, %ymm4, %ymm0 ; AVX2-NEXT: vpsubd %ymm2, %ymm3, %ymm1 -; AVX2-NEXT: vmovdqu %ymm1, .Ld$local+{{.*}}(%rip) -; AVX2-NEXT: vmovdqu %ymm0, .Ld$local+{{.*}}(%rip) +; AVX2-NEXT: vmovdqu %ymm1, d+{{.*}}(%rip) +; AVX2-NEXT: vmovdqu %ymm0, d+{{.*}}(%rip) ; AVX2-NEXT: vpaddd %ymm2, %ymm2, %ymm0 -; AVX2-NEXT: vmovdqu %ymm0, .Lc$local+{{.*}}(%rip) +; AVX2-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: PR42833: ; AVX512: # %bb.0: -; AVX512-NEXT: movl .Lb${{.*}}(%rip), %eax -; AVX512-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 -; AVX512-NEXT: vmovdqu64 .Lc$local+{{.*}}(%rip), %zmm1 -; AVX512-NEXT: addl .Lc$local+{{.*}}(%rip), %eax +; AVX512-NEXT: movl b(%rip), %eax +; AVX512-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX512-NEXT: vmovdqu64 c+{{.*}}(%rip), %zmm1 +; AVX512-NEXT: addl c+{{.*}}(%rip), %eax ; AVX512-NEXT: vmovd %eax, %xmm2 ; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm2 ; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] -; AVX512-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm2 -; AVX512-NEXT: vmovdqu %ymm0, .Lc$local+{{.*}}(%rip) -; AVX512-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 -; AVX512-NEXT: vmovdqu64 .Ld$local+{{.*}}(%rip), %zmm3 +; AVX512-NEXT: vmovdqa c+{{.*}}(%rip), %xmm2 +; AVX512-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) +; AVX512-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX512-NEXT: vmovdqu64 d+{{.*}}(%rip), %zmm3 ; AVX512-NEXT: vpinsrd $0, %eax, %xmm2, %xmm2 ; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 ; AVX512-NEXT: vpsubd %zmm1, %zmm3, %zmm1 -; AVX512-NEXT: vmovdqu64 %zmm1, .Ld$local+{{.*}}(%rip) +; AVX512-NEXT: vmovdqu64 %zmm1, d+{{.*}}(%rip) ; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqu %ymm0, .Lc$local+{{.*}}(%rip) +; AVX512-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; ; XOP-LABEL: PR42833: ; XOP: # %bb.0: -; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm0 ; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: addl .Lb${{.*}}(%rip), %eax +; XOP-NEXT: addl b(%rip), %eax ; XOP-NEXT: vmovd %eax, %xmm1 ; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2 -; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 ; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3 ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7] -; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2 -; XOP-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2 -; XOP-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip) +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm2 +; XOP-NEXT: vpsubd c+{{.*}}(%rip), %xmm2, %xmm2 +; XOP-NEXT: vmovups %ymm1, c+{{.*}}(%rip) ; XOP-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 -; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm1 +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 ; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm1 -; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3 +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 ; XOP-NEXT: vpsubd %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm4 -; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm5 +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm4 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm5 ; XOP-NEXT: vpsubd %xmm5, %xmm4, %xmm4 -; XOP-NEXT: vmovdqa %xmm2, .Ld$local+{{.*}}(%rip) -; XOP-NEXT: vmovdqa %xmm4, .Ld$local+{{.*}}(%rip) -; XOP-NEXT: vmovdqa %xmm1, .Ld$local+{{.*}}(%rip) -; XOP-NEXT: vmovdqa %xmm0, .Ld$local+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm2, d+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm4, d+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm1, d+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm0, d+{{.*}}(%rip) ; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm0 ; XOP-NEXT: vpaddd %xmm5, %xmm5, %xmm1 -; XOP-NEXT: vmovdqa %xmm1, .Lc$local+{{.*}}(%rip) -; XOP-NEXT: vmovdqa %xmm0, .Lc$local+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm1, c+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm0, c+{{.*}}(%rip) ; XOP-NEXT: vzeroupper ; XOP-NEXT: retq %1 = load i32, i32* @b, align 4 diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index 3c44798a805f0..d805dcad8b6e6 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -93,7 +93,7 @@ define dso_local void @fn() { ; CHECK-NEXT: # %bb.18: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $.Lfn$local, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf ; CHECK-NEXT: .LBB0_19: # %for.end46 diff --git a/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll b/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll index 06574056298dd..d0efd4d11c958 100644 --- a/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll +++ b/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple x86_64-unknown-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s $comdat_func = comdat any @@ -21,3 +21,8 @@ entry: call void @func() ret void } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"SemanticInterposition", i32 0} +!1 = !{i32 7, !"PIC Level", i32 2} diff --git a/llvm/test/CodeGen/X86/tls.ll b/llvm/test/CodeGen/X86/tls.ll index b1d29b34a9584..759f3d7c85500 100644 --- a/llvm/test/CodeGen/X86/tls.ll +++ b/llvm/test/CodeGen/X86/tls.ll @@ -210,10 +210,10 @@ entry: define i32 @f7() { ; X86_LINUX-LABEL: f7: -; X86_LINUX: movl %gs:.Li4$local@NTPOFF, %eax +; X86_LINUX: movl %gs:i4@NTPOFF, %eax ; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f7: -; X64_LINUX: movl %fs:.Li4$local@TPOFF, %eax +; X64_LINUX: movl %fs:i4@TPOFF, %eax ; X64_LINUX-NEXT: ret ; MINGW32-LABEL: _f7: ; MINGW32: movl __tls_index, %eax @@ -230,11 +230,11 @@ entry: define i32* @f8() { ; X86_LINUX-LABEL: f8: ; X86_LINUX: movl %gs:0, %eax -; X86_LINUX-NEXT: leal .Li4$local@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax ; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f8: ; X64_LINUX: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq .Li4$local@TPOFF(%rax), %rax +; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax ; X64_LINUX-NEXT: ret ; MINGW32-LABEL: _f8: ; MINGW32: movl __tls_index, %eax From c34936dae734085c4bc01703da0f5b7456e1bf51 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 26 May 2020 09:21:54 +0200 Subject: [PATCH 064/770] [lldb] s/dyn_cast/isa The cast result is unused and produces a warning with gcc. --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index c687251ed5dcb..9ff8bdb7537fa 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -344,7 +344,7 @@ static void SetMemberOwningModule(clang::Decl *member, member->setFromASTFile(); member->setOwningModuleID(id.GetValue()); member->setModuleOwnershipKind(clang::Decl::ModuleOwnershipKind::Visible); - if (auto *nd = llvm::dyn_cast(member)) + if (llvm::isa(member)) if (auto *dc = llvm::dyn_cast(parent)) { dc->setHasExternalVisibleStorage(true); // This triggers ExternalASTSource::FindExternalVisibleDeclsByName() to be From 1f72d5880e332dfbd36c22388d2b72bd2bd70411 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 08:31:38 +0100 Subject: [PATCH 065/770] [CostModel] Check for free intrinsics in BasicTTI Recommitting part of "[CostModel] Unify Intrinsic Costs." de71def3f59dc9f12f67141b5040d8e15c84d08a Now that the 'free' intrinsic information has been sunk to the lowest level, query the base implementation in BasicTTI before doing anything else. I suspect this is the change that was causing the main changes, particularly the large effects on debug builds. Differential Revision: https://reviews.llvm.org/D80012 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 + .../Analysis/CostModel/X86/free-intrinsics.ll | 78 ++++++++++++++++++ .../CostModel/free-intrinsics-datalayout.ll | 80 +++++++++++++++++++ .../CostModel/free-intrinsics-no_info.ll | 78 ++++++++++++++++++ 4 files changed, 239 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/X86/free-intrinsics.ll create mode 100644 llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll create mode 100644 llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 5a891779e1857..dbbcc795ea00f 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1091,6 +1091,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0) + return 0; + // TODO: Combine these two logic paths. if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll b/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll new file mode 100644 index 0000000000000..f85e267637141 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -mtriple=x86_64-- -analyze -cost-model -cost-kind=code-size %s -S -o - | FileCheck %s --check-prefix=CHECK-SIZE +; RUN: opt -mtriple=x86_64-- -analyze -cost-model -cost-kind=throughput %s -S -o - | FileCheck %s --check-prefix=CHECK-THROUGHPUT + +define i32 @trivially_free() { +; CHECK-SIZE-LABEL: 'trivially_free' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect() +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; CHECK-THROUGHPUT-LABEL: 'trivially_free' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect() +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) + call void @llvm.assume(i1 undef) + call void @llvm.sideeffect() + call void @llvm.dbg.declare(metadata i8** undef, metadata !0, metadata !DIExpression()) + call void @llvm.dbg.value(metadata i64 undef, i64 undef, metadata !DIExpression(), metadata !DIExpression()) + call void @llvm.dbg.label(metadata !2) + %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) + call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) + %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) + %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) + %a4 = call i1 @llvm.is.constant.i32(i32 undef) + call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) + call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) + %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 1, i1 1, i1 1) + %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) + call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) + ret i32 undef +} + +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) +declare void @llvm.assume(i1) +declare void @llvm.sideeffect() +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) +declare void @llvm.dbg.label(metadata) +declare {}* @llvm.invariant.start.p0i8(i64, i8*) +declare void @llvm.invariant.end.p0i8({}*, i64, i8*) +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) +declare i1 @llvm.is.constant.i32(i32) +declare void @llvm.lifetime.start.p0i8(i64, i8*) +declare void @llvm.lifetime.end.p0i8(i64, i8*) +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) +declare void @llvm.var.annotation(i8*, i8*, i8*, i32) + + +!0 = !DILocalVariable(scope: !1) +!1 = distinct !DISubprogram(name: "dummy", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true) +!2 = !DILabel(scope: !1, name: "label", file: !3, line: 7) +!3 = !DIFile(filename: "debug-label.c", directory: "./") diff --git a/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll b/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll new file mode 100644 index 0000000000000..232265a5cdfdb --- /dev/null +++ b/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -cost-model -cost-kind=code-size %s -S -o - | FileCheck %s --check-prefix=CHECK-SIZE +; RUN: opt -analyze -cost-model -cost-kind=throughput %s -S -o - | FileCheck %s --check-prefix=CHECK-THROUGHPUT + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define i32 @trivially_free() { +; CHECK-SIZE-LABEL: 'trivially_free' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect() +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; CHECK-THROUGHPUT-LABEL: 'trivially_free' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect() +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) + call void @llvm.assume(i1 undef) + call void @llvm.sideeffect() + call void @llvm.dbg.declare(metadata i8** undef, metadata !0, metadata !DIExpression()) + call void @llvm.dbg.value(metadata i64 undef, i64 undef, metadata !DIExpression(), metadata !DIExpression()) + call void @llvm.dbg.label(metadata !2) + %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) + call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) + %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) + %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) + %a4 = call i1 @llvm.is.constant.i32(i32 undef) + call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) + call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) + %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 1, i1 1, i1 1) + %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) + call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) + ret i32 undef +} + +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) +declare void @llvm.assume(i1) +declare void @llvm.sideeffect() +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) +declare void @llvm.dbg.label(metadata) +declare {}* @llvm.invariant.start.p0i8(i64, i8*) +declare void @llvm.invariant.end.p0i8({}*, i64, i8*) +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) +declare i1 @llvm.is.constant.i32(i32) +declare void @llvm.lifetime.start.p0i8(i64, i8*) +declare void @llvm.lifetime.end.p0i8(i64, i8*) +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) +declare void @llvm.var.annotation(i8*, i8*, i8*, i32) + + +!0 = !DILocalVariable(scope: !1) +!1 = distinct !DISubprogram(name: "dummy", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true) +!2 = !DILabel(scope: !1, name: "label", file: !3, line: 7) +!3 = !DIFile(filename: "debug-label.c", directory: "./") diff --git a/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll b/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll new file mode 100644 index 0000000000000..9622a4f0dd1db --- /dev/null +++ b/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -cost-model -cost-kind=code-size %s -S -o - | FileCheck %s --check-prefix=CHECK-SIZE +; RUN: opt -analyze -cost-model -cost-kind=throughput %s -S -o - | FileCheck %s --check-prefix=CHECK-THROUGHPUT + +define i32 @trivially_free() { +; CHECK-SIZE-LABEL: 'trivially_free' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect() +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; CHECK-THROUGHPUT-LABEL: 'trivially_free' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect() +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef) + call void @llvm.assume(i1 undef) + call void @llvm.sideeffect() + call void @llvm.dbg.declare(metadata i8** undef, metadata !0, metadata !DIExpression()) + call void @llvm.dbg.value(metadata i64 undef, i64 undef, metadata !DIExpression(), metadata !DIExpression()) + call void @llvm.dbg.label(metadata !2) + %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef) + call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef) + %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef) + %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) + %a4 = call i1 @llvm.is.constant.i32(i32 undef) + call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) + call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) + %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 1, i1 1, i1 1) + %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef) + call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef) + ret i32 undef +} + +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) +declare void @llvm.assume(i1) +declare void @llvm.sideeffect() +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) +declare void @llvm.dbg.label(metadata) +declare {}* @llvm.invariant.start.p0i8(i64, i8*) +declare void @llvm.invariant.end.p0i8({}*, i64, i8*) +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) +declare i1 @llvm.is.constant.i32(i32) +declare void @llvm.lifetime.start.p0i8(i64, i8*) +declare void @llvm.lifetime.end.p0i8(i64, i8*) +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) +declare void @llvm.var.annotation(i8*, i8*, i8*, i32) + + +!0 = !DILocalVariable(scope: !1) +!1 = distinct !DISubprogram(name: "dummy", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true) +!2 = !DILabel(scope: !1, name: "label", file: !3, line: 7) +!3 = !DIFile(filename: "debug-label.c", directory: "./") From 80cc43b420a8ab8648f44fbb554b483a2998712d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 May 2020 23:59:00 -0700 Subject: [PATCH 066/770] [AArch64] Set i32 ISD::MULHU/S to Expand instead of Legal. Looks like there are no isel patterns for these. A DAG combine turns it into i64 multiply and a shift which hides this. Extracted from D80485 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 856a2e4d9d67d..5eb9b7463411f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -354,6 +354,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::ROTR, VT, Expand); } + // AArch64 doesn't have i32 MULH{S|U}. + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + // AArch64 doesn't have {U|S}MUL_LOHI. setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); From 1abb883a048153c83a4e11070219d23f362e7377 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 20 May 2020 16:03:42 +0200 Subject: [PATCH 067/770] [clangd] Don't traverse the AST within uninteresting files during indexing Summary: We already skip function bodies from these files while parsing, and drop symbols found in them. However, traversing their ASTs still takes a substantial amount of time. Non-scientific benchmark on my machine: background-indexing llvm-project (llvm+clang+clang-tools-extra), wall time before: 7:46 after: 5:13 change: -33% Indexer.cpp libclang should be updated too, I'm less familiar with that code, and it's doing tricky things with the ShouldSkipFunctionBody callback, so it needs to be done separately. Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80296 --- .../clangd/index/IndexAction.cpp | 25 +++++++------ .../clangd/unittests/IndexActionTests.cpp | 37 +++++++++++++++++-- clang/include/clang/Index/IndexingAction.h | 17 ++++----- clang/include/clang/Index/IndexingOptions.h | 6 +++ clang/lib/Index/IndexDecl.cpp | 3 ++ clang/lib/Index/IndexingAction.cpp | 15 ++++++++ 6 files changed, 79 insertions(+), 24 deletions(-) diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp index 9f294d4ab9252..aa65008b51c00 100644 --- a/clang-tools-extra/clangd/index/IndexAction.cpp +++ b/clang-tools-extra/clangd/index/IndexAction.cpp @@ -132,11 +132,19 @@ class IndexAction : public ASTFrontendAction { std::function RefsCallback, std::function RelationsCallback, std::function IncludeGraphCallback) - : SymbolsCallback(SymbolsCallback), - RefsCallback(RefsCallback), RelationsCallback(RelationsCallback), + : SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback), + RelationsCallback(RelationsCallback), IncludeGraphCallback(IncludeGraphCallback), Collector(C), Includes(std::move(Includes)), Opts(Opts), - PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} + PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) { + this->Opts.ShouldTraverseDecl = [this](const Decl *D) { + auto &SM = D->getASTContext().getSourceManager(); + auto FID = SM.getFileID(SM.getExpansionLoc(D->getLocation())); + if (!FID.isValid()) + return true; + return Collector->shouldIndexFile(FID); + }; + } std::unique_ptr CreateASTConsumer(CompilerInstance &CI, llvm::StringRef InFile) override { @@ -146,15 +154,8 @@ class IndexAction : public ASTFrontendAction { CI.getPreprocessor().addPPCallbacks( std::make_unique(CI.getSourceManager(), IG)); - return index::createIndexingASTConsumer( - Collector, Opts, CI.getPreprocessorPtr(), - /*ShouldSkipFunctionBody=*/[this](const Decl *D) { - auto &SM = D->getASTContext().getSourceManager(); - auto FID = SM.getFileID(SM.getExpansionLoc(D->getLocation())); - if (!FID.isValid()) - return false; - return !Collector->shouldIndexFile(FID); - }); + return index::createIndexingASTConsumer(Collector, Opts, + CI.getPreprocessorPtr()); } bool BeginInvocation(CompilerInstance &CI) override { diff --git a/clang-tools-extra/clangd/unittests/IndexActionTests.cpp b/clang-tools-extra/clangd/unittests/IndexActionTests.cpp index 6441d019c7e18..31e1bc573290f 100644 --- a/clang-tools-extra/clangd/unittests/IndexActionTests.cpp +++ b/clang-tools-extra/clangd/unittests/IndexActionTests.cpp @@ -19,6 +19,7 @@ namespace { using ::testing::AllOf; using ::testing::ElementsAre; +using ::testing::EndsWith; using ::testing::Not; using ::testing::Pair; using ::testing::UnorderedElementsAre; @@ -75,8 +76,7 @@ class IndexActionTest : public ::testing::Test { new FileManager(FileSystemOptions(), InMemoryFileSystem)); auto Action = createStaticIndexingAction( - SymbolCollector::Options(), - [&](SymbolSlab S) { IndexFile.Symbols = std::move(S); }, + Opts, [&](SymbolSlab S) { IndexFile.Symbols = std::move(S); }, [&](RefSlab R) { IndexFile.Refs = std::move(R); }, [&](RelationSlab R) { IndexFile.Relations = std::move(R); }, [&](IncludeGraph IG) { IndexFile.Sources = std::move(IG); }); @@ -99,11 +99,12 @@ class IndexActionTest : public ::testing::Test { void addFile(llvm::StringRef Path, llvm::StringRef Content) { InMemoryFileSystem->addFile(Path, 0, - llvm::MemoryBuffer::getMemBuffer(Content)); + llvm::MemoryBuffer::getMemBufferCopy(Content)); FilePaths.push_back(std::string(Path)); } protected: + SymbolCollector::Options Opts; std::vector FilePaths; llvm::IntrusiveRefCntPtr InMemoryFileSystem; }; @@ -250,6 +251,36 @@ TEST_F(IndexActionTest, NoWarnings) { EXPECT_THAT(*IndexFile.Symbols, ElementsAre(HasName("foo"), HasName("bar"))); } +TEST_F(IndexActionTest, SkipFiles) { + std::string MainFilePath = testPath("main.cpp"); + addFile(MainFilePath, R"cpp( + // clang-format off + #include "good.h" + #include "bad.h" + // clang-format on + )cpp"); + addFile(testPath("good.h"), R"cpp( + struct S { int s; }; + void f1() { S f; } + auto unskippable1() { return S(); } + )cpp"); + addFile(testPath("bad.h"), R"cpp( + struct T { S t; }; + void f2() { S f; } + auto unskippable2() { return S(); } + )cpp"); + Opts.FileFilter = [](const SourceManager &SM, FileID F) { + return !SM.getFileEntryForID(F)->getName().endswith("bad.h"); + }; + IndexFileIn IndexFile = runIndexingAction(MainFilePath, {"-std=c++14"}); + EXPECT_THAT(*IndexFile.Symbols, + UnorderedElementsAre(HasName("S"), HasName("s"), HasName("f1"), + HasName("unskippable1"))); + for (const auto &Pair : *IndexFile.Refs) + for (const auto &Ref : Pair.second) + EXPECT_THAT(Ref.Location.FileURI, EndsWith("good.h")); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang/include/clang/Index/IndexingAction.h b/clang/include/clang/Index/IndexingAction.h index 9ed2a018f1617..4baa2d5e72603 100644 --- a/clang/include/clang/Index/IndexingAction.h +++ b/clang/include/clang/Index/IndexingAction.h @@ -30,22 +30,21 @@ namespace serialization { } namespace index { - class IndexDataConsumer; +class IndexDataConsumer; /// Creates an ASTConsumer that indexes all symbols (macros and AST decls). +std::unique_ptr +createIndexingASTConsumer(std::shared_ptr DataConsumer, + const IndexingOptions &Opts, + std::shared_ptr PP); + std::unique_ptr createIndexingASTConsumer( std::shared_ptr DataConsumer, const IndexingOptions &Opts, std::shared_ptr PP, + // Prefer to set Opts.ShouldTraverseDecl and use the above overload. + // This version is only needed if used to *track* function body parsing. std::function ShouldSkipFunctionBody); -inline std::unique_ptr createIndexingASTConsumer( - std::shared_ptr DataConsumer, - const IndexingOptions &Opts, std::shared_ptr PP) { - return createIndexingASTConsumer( - std::move(DataConsumer), Opts, std::move(PP), - /*ShouldSkipFunctionBody=*/[](const Decl *) { return false; }); -} - /// Creates a frontend action that indexes all symbols (macros and AST decls). std::unique_ptr createIndexingAction(std::shared_ptr DataConsumer, diff --git a/clang/include/clang/Index/IndexingOptions.h b/clang/include/clang/Index/IndexingOptions.h index bbfd6e4a72c62..2dd276998abf7 100644 --- a/clang/include/clang/Index/IndexingOptions.h +++ b/clang/include/clang/Index/IndexingOptions.h @@ -34,6 +34,12 @@ struct IndexingOptions { // Has no effect if IndexFunctionLocals are false. bool IndexParametersInDeclarations = false; bool IndexTemplateParameters = false; + + // If set, skip indexing inside some declarations for performance. + // This prevents traversal, so skipping a struct means its declaration an + // members won't be indexed, but references elsewhere to that struct will be. + // Currently this is only checked for top-level declarations. + std::function ShouldTraverseDecl; }; } // namespace index diff --git a/clang/lib/Index/IndexDecl.cpp b/clang/lib/Index/IndexDecl.cpp index 68160bc59eb6a..2ba323e635753 100644 --- a/clang/lib/Index/IndexDecl.cpp +++ b/clang/lib/Index/IndexDecl.cpp @@ -765,6 +765,9 @@ bool IndexingContext::indexTopLevelDecl(const Decl *D) { if (isa(D)) return true; // Wait for the objc container. + if (IndexOpts.ShouldTraverseDecl && !IndexOpts.ShouldTraverseDecl(D)) + return true; // skip + return indexDecl(D); } diff --git a/clang/lib/Index/IndexingAction.cpp b/clang/lib/Index/IndexingAction.cpp index 4f402135672c3..e698c07133a9c 100644 --- a/clang/lib/Index/IndexingAction.cpp +++ b/clang/lib/Index/IndexingAction.cpp @@ -131,6 +131,21 @@ std::unique_ptr index::createIndexingASTConsumer( ShouldSkipFunctionBody); } +std::unique_ptr clang::index::createIndexingASTConsumer( + std::shared_ptr DataConsumer, + const IndexingOptions &Opts, std::shared_ptr PP) { + std::function ShouldSkipFunctionBody = [](const Decl *) { + return false; + }; + if (Opts.ShouldTraverseDecl) + ShouldSkipFunctionBody = + [ShouldTraverseDecl(Opts.ShouldTraverseDecl)](const Decl *D) { + return !ShouldTraverseDecl(D); + }; + return createIndexingASTConsumer(std::move(DataConsumer), Opts, std::move(PP), + std::move(ShouldSkipFunctionBody)); +} + std::unique_ptr index::createIndexingAction(std::shared_ptr DataConsumer, const IndexingOptions &Opts) { From 64cfb8a864cf98dcd762a26d03cba95145b9aa41 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 09:41:04 +0100 Subject: [PATCH 068/770] [NFC][ARM] Add intrinsic code size runs Add code size analysis of arithmetic intrinsics. --- .../Analysis/CostModel/ARM/arith-overflow.ll | 1023 +++++++++++------ .../test/Analysis/CostModel/ARM/arith-ssat.ll | 339 ++++-- .../test/Analysis/CostModel/ARM/arith-usat.ll | 339 ++++-- .../test/Analysis/CostModel/ARM/reduce-add.ll | 223 ++-- 4 files changed, 1245 insertions(+), 679 deletions(-) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll index 66a03888c0d45..b50aa97643e22 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll @@ -1,7 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M -; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON -; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) declare {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64>, <2 x i64>) @@ -24,62 +27,119 @@ declare {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8>, <32 x declare {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8>, <64 x i8>) define i32 @sadd(i32 %arg) { -; V8M-LABEL: 'sadd' -; V8M-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'sadd' -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'sadd' -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 582 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'sadd' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'sadd' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'sadd' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 582 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'sadd' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'sadd' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'sadd' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -125,62 +185,119 @@ declare {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8>, <32 x declare {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8>, <64 x i8>) define i32 @uadd(i32 %arg) { -; V8M-LABEL: 'uadd' -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'uadd' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'uadd' -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'uadd' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'uadd' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'uadd' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'uadd' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'uadd' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'uadd' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -226,62 +343,119 @@ declare {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8>, <32 x declare {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8>, <64 x i8>) define i32 @ssub(i32 %arg) { -; V8M-LABEL: 'ssub' -; V8M-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'ssub' -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'ssub' -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 582 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'ssub' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'ssub' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'ssub' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 582 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'ssub' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'ssub' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'ssub' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -327,62 +501,119 @@ declare {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8>, <32 x declare {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8>, <64 x i8>) define i32 @usub(i32 %arg) { -; V8M-LABEL: 'usub' -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'usub' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'usub' -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'usub' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'usub' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'usub' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'usub' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'usub' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'usub' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef) %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -428,62 +659,119 @@ declare {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8>, <32 x declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x i8>) define i32 @smul(i32 %arg) { -; V8M-LABEL: 'smul' -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'smul' -; NEON-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'smul' -; MVE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1208 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'smul' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'smul' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'smul' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1208 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'smul' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'smul' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'smul' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -529,62 +817,119 @@ declare {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8>, <32 x declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x i8>) define i32 @umul(i32 %arg) { -; V8M-LABEL: 'umul' -; V8M-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'umul' -; NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'umul' -; MVE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'umul' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'umul' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'umul' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'umul' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'umul' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'umul' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll index 59687ce72249c..bc2ac44215e01 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll @@ -1,7 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M -; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON -; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE declare i64 @llvm.sadd.sat.i64(i64, i64) declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) @@ -24,62 +27,119 @@ declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { -; V8M-LABEL: 'add' -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'add' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; NEON-LABEL: 'add' -; NEON-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-RECIP-LABEL: 'add' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; MVE-LABEL: 'add' -; MVE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1046 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-RECIP-LABEL: 'add' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1046 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'add' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'add' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'add' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -125,62 +185,119 @@ declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { -; V8M-LABEL: 'sub' -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'sub' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'sub' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'sub' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1046 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'sub' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; -; NEON-LABEL: 'sub' -; NEON-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-SIZE-LABEL: 'sub' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; -; MVE-LABEL: 'sub' -; MVE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1046 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-SIZE-LABEL: 'sub' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll index 92c313f4ab1c5..c4f09937b47e5 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll @@ -1,7 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M -; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON -; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE declare i64 @llvm.uadd.sat.i64(i64, i64) declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) @@ -24,62 +27,119 @@ declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { -; V8M-LABEL: 'add' -; V8M-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'add' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; NEON-LABEL: 'add' -; NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-RECIP-LABEL: 'add' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; MVE-LABEL: 'add' -; MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-RECIP-LABEL: 'add' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'add' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'add' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'add' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -125,62 +185,119 @@ declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { -; V8M-LABEL: 'sub' -; V8M-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'sub' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; NEON-RECIP-LABEL: 'sub' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; MVE-RECIP-LABEL: 'sub' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'sub' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; -; NEON-LABEL: 'sub' -; NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-SIZE-LABEL: 'sub' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; -; MVE-LABEL: 'sub' -; MVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-SIZE-LABEL: 'sub' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll index 73614ab5ece83..089061253d822 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll @@ -1,32 +1,59 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M -; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON -; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE define i32 @reduce_i64(i32 %arg) { -; V8M-LABEL: 'reduce_i64' -; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'reduce_i64' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; NEON-LABEL: 'reduce_i64' -; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-RECIP-LABEL: 'reduce_i64' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-RECIP-LABEL: 'reduce_i64' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; V8M-SIZE-LABEL: 'reduce_i64' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; NEON-SIZE-LABEL: 'reduce_i64' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; MVE-SIZE-LABEL: 'reduce_i64' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) @@ -37,105 +64,65 @@ define i32 @reduce_i64(i32 %arg) { } define i32 @reduce_i32(i32 %arg) { -; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2166 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; - %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) - ret i32 undef -} - -define i32 @reduce_i16(i32 %arg) { -; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-RECIP-LABEL: 'reduce_i32' +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 297 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-RECIP-LABEL: 'reduce_i32' +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2708 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 8860 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-RECIP-LABEL: 'reduce_i32' +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4120 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5658 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11806 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36390 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) - ret i32 undef -} - -define i32 @reduce_i8(i32 %arg) { -; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; V8M-SIZE-LABEL: 'reduce_i32' +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; -; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; NEON-SIZE-LABEL: 'reduce_i32' +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; -; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4120 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5658 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 11806 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 36390 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; MVE-SIZE-LABEL: 'reduce_i32' +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) From 871556a494552c0f503eec17055f075bcd859937 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 09:23:18 +0100 Subject: [PATCH 069/770] [CostModel] Unify Intrinsic Costs. Recommitting most of the remaining changes from 259eb619ff6dcd5b6111d1686e18559b9ca004d4, but excluding the call to getUserCost from getInstructionThroughput. Though there's still no test changes, I doubt that this is an NFC... With the two getIntrinsicInstrCosts folded into one, now fold in the scalar/code-size orientated getIntrinsicCost. The remaining scalar intrinsics were memcpy, cttz and ctlz which now have special handling in the BasicTTI implementation. This had required a change in the AMDGPU backend for fabs as it should always be 'free'. I've also changed the X86 backend to return the BaseT implementation when the CostKind isn't RecipThroughput. Differential Revision: https://reviews.llvm.org/D80012 --- .../llvm/Analysis/TargetTransformInfo.h | 40 ++--------- .../llvm/Analysis/TargetTransformInfoImpl.h | 37 ++-------- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 70 +++++++++---------- llvm/lib/Analysis/TargetTransformInfo.cpp | 27 ++++--- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 3 + .../lib/Target/X86/X86TargetTransformInfo.cpp | 6 ++ .../Transforms/Scalar/LoopIdiomRecognize.cpp | 8 ++- 7 files changed, 73 insertions(+), 118 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c2ba9a488dca2..c50c696741b17 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -38,6 +38,7 @@ class AssumptionCache; class BlockFrequencyInfo; class DominatorTree; class BranchInst; +class CallBase; class Function; class GlobalValue; class IntrinsicInst; @@ -120,10 +121,12 @@ class IntrinsicCostAttributes { public: IntrinsicCostAttributes(const IntrinsicInst &I); - IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI); + + IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor); - IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor, unsigned ScalarCost); IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, @@ -141,7 +144,7 @@ class IntrinsicCostAttributes { IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Tys); - IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, + IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Args); Intrinsic::ID getID() const { return IID; } @@ -288,18 +291,6 @@ class TargetTransformInfo { /// scientific. A target may has no bonus on vector instructions. int getInlinerVectorBonusPercent() const; - /// Estimate the cost of an intrinsic when lowered. - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; - - /// Estimate the cost of an intrinsic when lowered. - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; - /// \return the expected cost of a memcpy, which could e.g. depend on the /// source/destination type and alignment and the number of bytes copied. int getMemcpyCost(const Instruction *I) const; @@ -1231,13 +1222,6 @@ class TargetTransformInfo::Concept { TTI::TargetCostKind CostKind) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getInlinerVectorBonusPercent() = 0; - virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U, - enum TargetCostKind CostKind) = 0; - virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U, - enum TargetCostKind CostKind) = 0; virtual int getMemcpyCost(const Instruction *I) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, @@ -1495,18 +1479,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { int getInlinerVectorBonusPercent() override { return Impl.getInlinerVectorBonusPercent(); } - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { - return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); - } - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { - return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - } int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index d353bf056df9d..60de70dcb16a0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -772,36 +772,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return TTI::TCC_Basic; } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U, - TTI::TargetCostKind CostKind) { - switch (IID) { - default: - break; - // TODO: other libc intrinsics. - case Intrinsic::memcpy: - return static_cast(this)->getMemcpyCost(dyn_cast(U)); - } - IntrinsicCostAttributes Attrs(IID, RetTy, ParamTys); - return getIntrinsicInstrCost(Attrs, CostKind); - } - - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U, - TTI::TargetCostKind CostKind) { - // Delegate to the generic intrinsic handling code. This mostly provides an - // opportunity for targets to (for example) special case the cost of - // certain intrinsics based on constants used as arguments. - SmallVector ParamTys; - ParamTys.reserve(Arguments.size()); - for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) - ParamTys.push_back(Arguments[Idx]->getType()); - return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U, - CostKind); - } - - unsigned getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind) { + int getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); // FIXME: Unlikely to be true for anything but CodeSize. @@ -810,9 +782,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { if (F) { FunctionType *FTy = F->getFunctionType(); if (Intrinsic::ID IID = F->getIntrinsicID()) { - SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); - return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(), - ParamTys, U, CostKind); + IntrinsicCostAttributes Attrs(IID, *CB); + return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind); } if (!TargetTTI->isLoweredToCall(F)) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index dbbcc795ea00f..7866e71853cf3 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -296,30 +296,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::getGEPCost(PointeeType, Ptr, Operands); } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U, - TTI::TargetCostKind CostKind) { - return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - } - - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U, - TTI::TargetCostKind CostKind) { - if (IID == Intrinsic::cttz) { - if (getTLI()->isCheapToSpeculateCttz()) - return TargetTransformInfo::TCC_Basic; - return TargetTransformInfo::TCC_Expensive; - } - - if (IID == Intrinsic::ctlz) { - if (getTLI()->isCheapToSpeculateCtlz()) - return TargetTransformInfo::TCC_Basic; - return TargetTransformInfo::TCC_Expensive; - } - - return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); - } - unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, @@ -1090,6 +1066,28 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// Get intrinsic cost based on arguments. unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + Intrinsic::ID IID = ICA.getID(); + auto *ConcreteTTI = static_cast(this); + + // Special case some scalar intrinsics. + if (CostKind != TTI::TCK_RecipThroughput) { + switch (IID) { + default: + break; + case Intrinsic::cttz: + if (getTLI()->isCheapToSpeculateCttz()) + return TargetTransformInfo::TCC_Basic; + break; + case Intrinsic::ctlz: + if (getTLI()->isCheapToSpeculateCtlz()) + return TargetTransformInfo::TCC_Basic; + break; + case Intrinsic::memcpy: + return ConcreteTTI->getMemcpyCost(ICA.getInst()); + // TODO: other libc intrinsics. + } + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + } if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0) return 0; @@ -1098,17 +1096,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); - Intrinsic::ID IID = ICA.getID(); - const IntrinsicInst *I = ICA.getInst(); Type *RetTy = ICA.getReturnType(); - const SmallVectorImpl &Args = ICA.getArgs(); unsigned VF = ICA.getVectorFactor(); - FastMathFlags FMF = ICA.getFlags(); - unsigned RetVF = (RetTy->isVectorTy() ? cast(RetTy)->getNumElements() : 1); assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); - auto *ConcreteTTI = static_cast(this); + const IntrinsicInst *I = ICA.getInst(); + const SmallVectorImpl &Args = ICA.getArgs(); + FastMathFlags FMF = ICA.getFlags(); switch (IID) { default: { @@ -1595,13 +1590,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CostKind) + ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, CostKind); - if (IID == Intrinsic::experimental_constrained_fmuladd) - return ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr, - CostKind) + - ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr, - CostKind); + if (IID == Intrinsic::experimental_constrained_fmuladd) { + IntrinsicCostAttributes FMulAttrs( + Intrinsic::experimental_constrained_fmul, RetTy, Tys); + IntrinsicCostAttributes FAddAttrs( + Intrinsic::experimental_constrained_fadd, RetTy, Tys); + return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) + + ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind); + } // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 7e05fcca1170e..86952a5ad6592 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -63,7 +63,20 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) : FMF = FPMO->getFastMathFlags(); } -IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI) : + II(dyn_cast(&CI)), RetTy(CI.getType()), IID(Id) { + + if (auto *FPMO = dyn_cast(&CI)) + FMF = FPMO->getFastMathFlags(); + + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI, unsigned Factor) : RetTy(CI.getType()), IID(Id), VF(Factor) { @@ -76,7 +89,8 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } -IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI, unsigned Factor, unsigned ScalarCost) : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { @@ -236,15 +250,6 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } -int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U, - TTI::TargetCostKind CostKind) const { - int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index ca75531be4a46..2405a24dd14f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -560,6 +560,9 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + if (ICA.getID() == Intrinsic::fabs) + return 0; + if (!intrinsicHasPackedVectorBenefit(ICA.getID())) return BaseT::getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 09f99af8c8e82..4170b102f2b31 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2699,6 +2699,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost( int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); @@ -3932,6 +3935,9 @@ int X86TTIImpl::getGatherScatterOpCost( unsigned Alignment, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); unsigned VF = cast(SrcVTy)->getNumElements(); PointerType *PtrTy = dyn_cast(Ptr->getType()); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 8bd1aa8514ca5..5b93aad11e143 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1535,7 +1535,7 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() { // %inc = add nsw %i.0, 1 // br i1 %tobool - const Value *Args[] = + Value *Args[] = {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext()) : ConstantInt::getFalse(InitX->getContext())}; @@ -1544,9 +1544,11 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() { uint32_t HeaderSize = std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end()); + IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args); + int Cost = + TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency); if (HeaderSize != IdiomCanonicalSize && - TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) > - TargetTransformInfo::TCC_Basic) + Cost > TargetTransformInfo::TCC_Basic) return false; transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX, From 98cad555e29187a03e2bc3db5780762981913902 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Tue, 5 May 2020 11:52:09 +0100 Subject: [PATCH 070/770] [Clang][AArch64] Capturing proper pointer alignment for Neon vld1 intrinsicts Summary: During CodeGen for AArch64 Neon intrinsics, Clang was incorrectly assuming all the pointers from which loads were being generated for vld1 intrinsics were aligned according to the intrinsics result type, causing alignment faults on the code generated by the backend. This patch updates vld1 intrinsics' CodeGen to properly capture the correct load alignment based on the type of the pointer provided as input for the intrinsic. Reviewers: t.p.northover, ostannard, pcc Reviewed By: ostannard Subscribers: kristof.beyls, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D79721 --- clang/lib/CodeGen/CGBuiltin.cpp | 12 ++--- clang/test/CodeGen/aarch64-neon-intrinsics.c | 52 ++++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1adae1a7ea42a..ddd9a68a8edb7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10327,9 +10327,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { + auto Alignment = CGM.getNaturalPointeeTypeAlignment( + E->getArg(0)->IgnoreParenCasts()->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); - auto Alignment = CharUnits::fromQuantity( - BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); } case NEON::BI__builtin_neon_vst1_v: @@ -10342,8 +10342,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - auto Alignment = CharUnits::fromQuantity( - BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); + auto Alignment = CGM.getNaturalPointeeTypeAlignment( + E->getArg(0)->IgnoreParenCasts()->getType()); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); @@ -10353,8 +10353,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *V = UndefValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - auto Alignment = CharUnits::fromQuantity( - BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); + auto Alignment = CGM.getNaturalPointeeTypeAlignment( + E->getArg(0)->IgnoreParenCasts()->getType()); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 7744b4f4a159d..1fb245f3d3429 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -8956,7 +8956,7 @@ float64_t test_vrsqrted_f64(float64_t a) { // CHECK-LABEL: @test_vld1q_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 // CHECK: ret <16 x i8> [[TMP1]] uint8x16_t test_vld1q_u8(uint8_t const *a) { return vld1q_u8(a); @@ -8965,7 +8965,7 @@ uint8x16_t test_vld1q_u8(uint8_t const *a) { // CHECK-LABEL: @test_vld1q_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 // CHECK: ret <8 x i16> [[TMP2]] uint16x8_t test_vld1q_u16(uint16_t const *a) { return vld1q_u16(a); @@ -8974,7 +8974,7 @@ uint16x8_t test_vld1q_u16(uint16_t const *a) { // CHECK-LABEL: @test_vld1q_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 // CHECK: ret <4 x i32> [[TMP2]] uint32x4_t test_vld1q_u32(uint32_t const *a) { return vld1q_u32(a); @@ -8983,7 +8983,7 @@ uint32x4_t test_vld1q_u32(uint32_t const *a) { // CHECK-LABEL: @test_vld1q_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* -// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 // CHECK: ret <2 x i64> [[TMP2]] uint64x2_t test_vld1q_u64(uint64_t const *a) { return vld1q_u64(a); @@ -8991,7 +8991,7 @@ uint64x2_t test_vld1q_u64(uint64_t const *a) { // CHECK-LABEL: @test_vld1q_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 // CHECK: ret <16 x i8> [[TMP1]] int8x16_t test_vld1q_s8(int8_t const *a) { return vld1q_s8(a); @@ -9000,7 +9000,7 @@ int8x16_t test_vld1q_s8(int8_t const *a) { // CHECK-LABEL: @test_vld1q_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 // CHECK: ret <8 x i16> [[TMP2]] int16x8_t test_vld1q_s16(int16_t const *a) { return vld1q_s16(a); @@ -9009,7 +9009,7 @@ int16x8_t test_vld1q_s16(int16_t const *a) { // CHECK-LABEL: @test_vld1q_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 // CHECK: ret <4 x i32> [[TMP2]] int32x4_t test_vld1q_s32(int32_t const *a) { return vld1q_s32(a); @@ -9018,7 +9018,7 @@ int32x4_t test_vld1q_s32(int32_t const *a) { // CHECK-LABEL: @test_vld1q_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* -// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 // CHECK: ret <2 x i64> [[TMP2]] int64x2_t test_vld1q_s64(int64_t const *a) { return vld1q_s64(a); @@ -9027,7 +9027,7 @@ int64x2_t test_vld1q_s64(int64_t const *a) { // CHECK-LABEL: @test_vld1q_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* -// CHECK: [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]], align 2 // CHECK: ret <8 x half> [[TMP2]] float16x8_t test_vld1q_f16(float16_t const *a) { return vld1q_f16(a); @@ -9036,7 +9036,7 @@ float16x8_t test_vld1q_f16(float16_t const *a) { // CHECK-LABEL: @test_vld1q_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 // CHECK: ret <4 x float> [[TMP2]] float32x4_t test_vld1q_f32(float32_t const *a) { return vld1q_f32(a); @@ -9045,7 +9045,7 @@ float32x4_t test_vld1q_f32(float32_t const *a) { // CHECK-LABEL: @test_vld1q_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* -// CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 // CHECK: ret <2 x double> [[TMP2]] float64x2_t test_vld1q_f64(float64_t const *a) { return vld1q_f64(a); @@ -9053,7 +9053,7 @@ float64x2_t test_vld1q_f64(float64_t const *a) { // CHECK-LABEL: @test_vld1q_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 // CHECK: ret <16 x i8> [[TMP1]] poly8x16_t test_vld1q_p8(poly8_t const *a) { return vld1q_p8(a); @@ -9062,7 +9062,7 @@ poly8x16_t test_vld1q_p8(poly8_t const *a) { // CHECK-LABEL: @test_vld1q_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 // CHECK: ret <8 x i16> [[TMP2]] poly16x8_t test_vld1q_p16(poly16_t const *a) { return vld1q_p16(a); @@ -9070,7 +9070,7 @@ poly16x8_t test_vld1q_p16(poly16_t const *a) { // CHECK-LABEL: @test_vld1_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 // CHECK: ret <8 x i8> [[TMP1]] uint8x8_t test_vld1_u8(uint8_t const *a) { return vld1_u8(a); @@ -9079,7 +9079,7 @@ uint8x8_t test_vld1_u8(uint8_t const *a) { // CHECK-LABEL: @test_vld1_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 // CHECK: ret <4 x i16> [[TMP2]] uint16x4_t test_vld1_u16(uint16_t const *a) { return vld1_u16(a); @@ -9088,7 +9088,7 @@ uint16x4_t test_vld1_u16(uint16_t const *a) { // CHECK-LABEL: @test_vld1_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 // CHECK: ret <2 x i32> [[TMP2]] uint32x2_t test_vld1_u32(uint32_t const *a) { return vld1_u32(a); @@ -9097,7 +9097,7 @@ uint32x2_t test_vld1_u32(uint32_t const *a) { // CHECK-LABEL: @test_vld1_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* -// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8 // CHECK: ret <1 x i64> [[TMP2]] uint64x1_t test_vld1_u64(uint64_t const *a) { return vld1_u64(a); @@ -9105,7 +9105,7 @@ uint64x1_t test_vld1_u64(uint64_t const *a) { // CHECK-LABEL: @test_vld1_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 // CHECK: ret <8 x i8> [[TMP1]] int8x8_t test_vld1_s8(int8_t const *a) { return vld1_s8(a); @@ -9114,7 +9114,7 @@ int8x8_t test_vld1_s8(int8_t const *a) { // CHECK-LABEL: @test_vld1_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 // CHECK: ret <4 x i16> [[TMP2]] int16x4_t test_vld1_s16(int16_t const *a) { return vld1_s16(a); @@ -9123,7 +9123,7 @@ int16x4_t test_vld1_s16(int16_t const *a) { // CHECK-LABEL: @test_vld1_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 // CHECK: ret <2 x i32> [[TMP2]] int32x2_t test_vld1_s32(int32_t const *a) { return vld1_s32(a); @@ -9132,7 +9132,7 @@ int32x2_t test_vld1_s32(int32_t const *a) { // CHECK-LABEL: @test_vld1_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* -// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8 // CHECK: ret <1 x i64> [[TMP2]] int64x1_t test_vld1_s64(int64_t const *a) { return vld1_s64(a); @@ -9141,7 +9141,7 @@ int64x1_t test_vld1_s64(int64_t const *a) { // CHECK-LABEL: @test_vld1_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* -// CHECK: [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]], align 2 // CHECK: ret <4 x half> [[TMP2]] float16x4_t test_vld1_f16(float16_t const *a) { return vld1_f16(a); @@ -9150,7 +9150,7 @@ float16x4_t test_vld1_f16(float16_t const *a) { // CHECK-LABEL: @test_vld1_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 // CHECK: ret <2 x float> [[TMP2]] float32x2_t test_vld1_f32(float32_t const *a) { return vld1_f32(a); @@ -9159,7 +9159,7 @@ float32x2_t test_vld1_f32(float32_t const *a) { // CHECK-LABEL: @test_vld1_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* -// CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]], align 8 // CHECK: ret <1 x double> [[TMP2]] float64x1_t test_vld1_f64(float64_t const *a) { return vld1_f64(a); @@ -9167,7 +9167,7 @@ float64x1_t test_vld1_f64(float64_t const *a) { // CHECK-LABEL: @test_vld1_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 // CHECK: ret <8 x i8> [[TMP1]] poly8x8_t test_vld1_p8(poly8_t const *a) { return vld1_p8(a); @@ -9176,7 +9176,7 @@ poly8x8_t test_vld1_p8(poly8_t const *a) { // CHECK-LABEL: @test_vld1_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] +// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 // CHECK: ret <4 x i16> [[TMP2]] poly16x4_t test_vld1_p16(poly16_t const *a) { return vld1_p16(a); From 2569787e44595d31942da2bb5558931351929e57 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 20 May 2020 17:01:35 +0300 Subject: [PATCH 071/770] [DebugInfo] - Fix multiple issues in DWARFDebugFrame::parse(). I've noticed an issue with "Data.getRelocatedValue(...)" call. it might silently ignore an error when a content is truncated. That leads to an infinite loop in the code (e.g. llvm-readobj hangs). After fixing the issue I've found that actually we always tried to read past the end of a section, even when a content was valid. It happened because the terminator CIE (a CIE with the length == 0) was never handled. At first I've tried just to stop adding the terminator entry (and return), but it does not seem to be correct, because tools like llvm-objdump might want to print something for such entries (see comments in the code and test cases). This patch fixes issues mentioned, provides new test cases for both llvm-readobj and lib/DebugInfo and adds FIXMEs to existent test cases related. Differential revision: https://reviews.llvm.org/D80299 --- llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 22 ++++++++++++++++--- llvm/test/DebugInfo/X86/eh-frame-truncated.s | 10 +++++++++ .../tools/llvm-objdump/eh_frame-mipsel.test | 1 + .../tools/llvm-objdump/eh_frame_zero_cie.test | 1 + llvm/test/tools/llvm-readobj/ELF/unwind.test | 19 ++++++++++++++++ 5 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/eh-frame-truncated.s diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 269a45e57a8d0..0e8d521f94330 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -375,7 +375,19 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { uint64_t Length; DwarfFormat Format; std::tie(Length, Format) = Data.getInitialLength(&Offset); - uint64_t Id; + bool IsDWARF64 = Format == DWARF64; + + // If the Length is 0, then this CIE is a terminator. We add it because some + // dumper tools might need it to print something special for such entries + // (e.g. llvm-objdump --dwarf=frames prints "ZERO terminator"). + if (Length == 0) { + auto Cie = std::make_unique( + IsDWARF64, StartOffset, 0, 0, SmallString<8>(), 0, 0, 0, 0, 0, + SmallString<8>(), 0, 0, None, None, Arch); + CIEs[StartOffset] = Cie.get(); + Entries.push_back(std::move(Cie)); + break; + } // At this point, Offset points to the next field after Length. // Length is the structure size excluding itself. Compute an offset one @@ -385,8 +397,12 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { uint64_t EndStructureOffset = Offset + Length; // The Id field's size depends on the DWARF format - bool IsDWARF64 = Format == DWARF64; - Id = Data.getRelocatedValue((IsDWARF64 && !IsEH) ? 8 : 4, &Offset); + Error Err = Error::success(); + uint64_t Id = Data.getRelocatedValue((IsDWARF64 && !IsEH) ? 8 : 4, &Offset, + /*SectionIndex=*/nullptr, &Err); + if (Err) + return Err; + if (Id == getCIEId(IsDWARF64, IsEH)) { uint8_t Version = Data.getU8(&Offset); const char *Augmentation = Data.getCStr(&Offset); diff --git a/llvm/test/DebugInfo/X86/eh-frame-truncated.s b/llvm/test/DebugInfo/X86/eh-frame-truncated.s new file mode 100644 index 0000000000000..28107e13530a0 --- /dev/null +++ b/llvm/test/DebugInfo/X86/eh-frame-truncated.s @@ -0,0 +1,10 @@ +## Check we report a proper error when the content +## of the .eh_frame section is truncated. + +# RUN: llvm-mc -triple x86_64 %s -filetype=obj -o %t +# RUN: not llvm-dwarfdump -debug-frame %t 2>&1 | FileCheck %s + +# CHECK: error: unexpected end of data at offset 0x4 + +.section .eh_frame,"a",@unwind +.long 0xFF ## Length diff --git a/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test b/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test index 67d2408269146..e89d9aeb53cb2 100644 --- a/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test +++ b/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test @@ -19,6 +19,7 @@ # CHECK: DW_CFA_offset: reg31 -4 # CHECK: DW_CFA_nop: +## FIXME: GNU objdump prints "00000038 ZERO terminator" instead. # CHECK: 00000038 00000000 00000000 CIE # CHECK: Version: 0 # CHECK: Augmentation: "" diff --git a/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test b/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test index 4702162a749bd..510c944028cdf 100644 --- a/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test +++ b/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test @@ -2,6 +2,7 @@ # CHECK: .eh_frame contents: +## FIXME: GNU objdump prints "00000000 ZERO terminator" instead. # CHECK: 00000000 00000000 00000000 CIE # CHECK: Version: 0 # CHECK: Augmentation: "" diff --git a/llvm/test/tools/llvm-readobj/ELF/unwind.test b/llvm/test/tools/llvm-readobj/ELF/unwind.test index dbdc9617aae3d..466c6a6a75178 100644 --- a/llvm/test/tools/llvm-readobj/ELF/unwind.test +++ b/llvm/test/tools/llvm-readobj/ELF/unwind.test @@ -243,3 +243,22 @@ Sections: ## .quad 0x00010000 # Address range ## .Lend: Content: 14000000FFFFFFFFCDAB1111000000000000010000000000 + +## Check we report a error when the .eh_frame section contains truncated data. +# RUN: yaml2obj --docnum=3 %s -o %t3.exe +# RUN: not llvm-readobj --unwind %t3.exe 2>&1 | FileCheck %s -DFILE=%t3.exe --check-prefix=TRUNCATED-ERR + +# TRUNCATED-ERR: .eh_frame section at offset 0x34 address 0x0: +# TRUNCATED-ERR-NEXT: error: '[[FILE]]': unexpected end of data at offset 0x4 + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +Sections: + - Name: .eh_frame + Type: SHT_PROGBITS +## Length is set to 0xFF, though the actual section length is 4. + Content: "FF000000" From 92f3828dc5675f9917d909eb75c29ba1e14920ad Mon Sep 17 00:00:00 2001 From: vpykhtin Date: Tue, 26 May 2020 12:09:46 +0300 Subject: [PATCH 072/770] [AMDGPU] Fix wait counts in the presence of 16bit subregisters Differential Revision: https://reviews.llvm.org/D80033 --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- llvm/test/CodeGen/AMDGPU/waitcnt.mir | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c115d26fa6a34..67c7ff1fcda43 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -505,7 +505,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo); unsigned Size = TRI->getRegSizeInBits(*RC); - Result.second = Result.first + (Size / 32); + Result.second = Result.first + ((Size + 16) / 32); return Result; } diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir index fd81ca83a1a1c..c568b8d32a237 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -41,6 +41,9 @@ ret void } + define amdgpu_kernel void @subregs16bit() { + ret void + } ... --- @@ -284,3 +287,19 @@ body: | FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr } ... + +--- +# CHECK-LABEL: name: subregs16bit +# CHECK: S_WAITCNT 112 +# CHECK-NEXT: V_NOP_e32 + +name: subregs16bit +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 + $vgpr0 = FLAT_LOAD_USHORT killed $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr1 = FLAT_LOAD_USHORT killed $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + V_NOP_e32 implicit $exec, implicit $vgpr0_lo16, implicit $vgpr1_lo16 +... From 48cdbd081c9111e2ffe41ac3022bdfc65df46655 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 10:29:42 +0100 Subject: [PATCH 073/770] [NFC][ARM] Add code size analysis tests Add code size runs for the cast costs. --- llvm/test/Analysis/CostModel/ARM/cast.ll | 4383 ++++++++++++++-------- 1 file changed, 2821 insertions(+), 1562 deletions(-) diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index a7fd0a141a56b..0e509c1f57b4f 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -1,1347 +1,2419 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK-NEON -; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE -; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK-NEON-RECIP +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-RECIP +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-RECIP +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-RECIP +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-SIZE +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE +; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE define i32 @casts() { -; CHECK-NEON-LABEL: 'casts' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef + ; -- scalars -- +; CHECK-NEON-RECIP-LABEL: 'casts' +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-MVE-LABEL: 'casts' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r52 = sitofp i1 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r53 = uitofp i1 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r56 = sitofp i8 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r57 = uitofp i8 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r60 = sitofp i16 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r61 = uitofp i16 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r64 = sitofp i32 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r65 = uitofp i32 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r68 = sitofp i64 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r69 = uitofp i64 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-MVE-RECIP-LABEL: 'casts' +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r52 = sitofp i1 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r53 = uitofp i1 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r56 = sitofp i8 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r57 = uitofp i8 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r60 = sitofp i16 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r61 = uitofp i16 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r64 = sitofp i32 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r65 = uitofp i32 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r68 = sitofp i64 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r69 = uitofp i64 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8M-MAIN-LABEL: 'casts' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-MAIN-RECIP-LABEL: 'casts' +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8M-BASE-LABEL: 'casts' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-BASE-RECIP-LABEL: 'casts' +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8R-LABEL: 'casts' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8R-RECIP-LABEL: 'casts' +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-MVE-SIZE-LABEL: 'casts' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r52 = sitofp i1 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r53 = uitofp i1 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r56 = sitofp i8 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r57 = uitofp i8 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r60 = sitofp i16 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r61 = uitofp i16 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r64 = sitofp i32 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r65 = uitofp i32 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r68 = sitofp i64 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r69 = uitofp i64 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-MAIN-SIZE-LABEL: 'casts' +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-BASE-SIZE-LABEL: 'casts' +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8R-SIZE-LABEL: 'casts' +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - ; -- scalars -- %r0 = sext i1 undef to i8 %r1 = zext i1 undef to i8 %r2 = sext i1 undef to i16 @@ -1644,187 +2716,330 @@ define i32 @casts() { define i32 @load_extends() { -; CHECK-NEON-LABEL: 'load_extends' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-NEON-RECIP-LABEL: 'load_extends' +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-MVE-LABEL: 'load_extends' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-MVE-RECIP-LABEL: 'load_extends' +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8M-MAIN-LABEL: 'load_extends' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-MAIN-RECIP-LABEL: 'load_extends' +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8M-BASE-LABEL: 'load_extends' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-BASE-RECIP-LABEL: 'load_extends' +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8R-LABEL: 'load_extends' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8R-RECIP-LABEL: 'load_extends' +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-MVE-SIZE-LABEL: 'load_extends' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-MAIN-SIZE-LABEL: 'load_extends' +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-BASE-SIZE-LABEL: 'load_extends' +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8R-SIZE-LABEL: 'load_extends' +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %loadi8 = load i8, i8* undef %loadi16 = load i16, i16* undef %loadi32 = load i32, i32* undef @@ -1865,60 +3080,104 @@ define i32 @load_extends() { } define i32 @bitcasts() { -; CHECK-NEON-LABEL: 'bitcasts' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-NEON-RECIP-LABEL: 'bitcasts' +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-MVE-RECIP-LABEL: 'bitcasts' +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-MAIN-RECIP-LABEL: 'bitcasts' +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-BASE-RECIP-LABEL: 'bitcasts' +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8R-RECIP-LABEL: 'bitcasts' +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-MVE-LABEL: 'bitcasts' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-MVE-SIZE-LABEL: 'bitcasts' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8M-MAIN-LABEL: 'bitcasts' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-MAIN-SIZE-LABEL: 'bitcasts' +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8M-BASE-LABEL: 'bitcasts' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-BASE-SIZE-LABEL: 'bitcasts' +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; CHECK-V8R-LABEL: 'bitcasts' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8R-SIZE-LABEL: 'bitcasts' +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %a = bitcast i32 undef to i32 %b = bitcast float undef to float From 3d4c873a14fe2ffb5cd6ac329354857eef245196 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 25 May 2020 17:30:41 +0300 Subject: [PATCH 074/770] [yaml2obj] - Map section names to chunks for each ELFYAML::ProgramHeader early. NFCI. Each `ELFYAML::ProgramHeader` currently contains a list of section names included. We are trying to map them to Fill/Sections very late, though we can create such mapping early, in `initProgramHeaders`. The benefit is that with such change it is possible to access mapped chunks earlier (for example during writing section content) and have simpler code. Differential revision: https://reviews.llvm.org/D80520 --- llvm/include/llvm/ObjectYAML/ELFYAML.h | 27 ++++++++------ llvm/lib/ObjectYAML/ELFEmitter.cpp | 51 ++++++++++++++++---------- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 2fd18fcd2957c..22ed82289ca8c 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -90,18 +90,6 @@ struct SectionName { StringRef Section; }; -struct ProgramHeader { - ELF_PT Type; - ELF_PF Flags; - llvm::yaml::Hex64 VAddr; - llvm::yaml::Hex64 PAddr; - Optional Align; - Optional FileSize; - Optional MemSize; - Optional Offset; - std::vector Sections; -}; - struct Symbol { StringRef Name; ELF_STT Type; @@ -503,6 +491,21 @@ struct MipsABIFlags : Section { } }; +struct ProgramHeader { + ELF_PT Type; + ELF_PF Flags; + llvm::yaml::Hex64 VAddr; + llvm::yaml::Hex64 PAddr; + Optional Align; + Optional FileSize; + Optional MemSize; + Optional Offset; + + std::vector Sections; + // This vector is parallel to Sections and contains corresponding chunks. + std::vector Chunks; +}; + struct Object { FileHeader Header; std::vector ProgramHeaders; diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 95d74eeeb6e6e..78093491704be 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -330,7 +330,13 @@ void ELFState::writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream template void ELFState::initProgramHeaders(std::vector &PHeaders) { - for (const auto &YamlPhdr : Doc.ProgramHeaders) { + DenseMap NameToFill; + for (const std::unique_ptr &D : Doc.Chunks) + if (auto S = dyn_cast(D.get())) + NameToFill[S->Name] = S; + + std::vector Sections = Doc.getSections(); + for (ELFYAML::ProgramHeader &YamlPhdr : Doc.ProgramHeaders) { Elf_Phdr Phdr; zero(Phdr); Phdr.p_type = YamlPhdr.Type; @@ -338,6 +344,23 @@ void ELFState::initProgramHeaders(std::vector &PHeaders) { Phdr.p_vaddr = YamlPhdr.VAddr; Phdr.p_paddr = YamlPhdr.PAddr; PHeaders.push_back(Phdr); + + // Map Sections list to corresponding chunks. + for (const ELFYAML::SectionName &SecName : YamlPhdr.Sections) { + if (ELFYAML::Fill *Fill = NameToFill.lookup(SecName.Section)) { + YamlPhdr.Chunks.push_back(Fill); + continue; + } + + unsigned Index; + if (SN2I.lookup(SecName.Section, Index)) { + YamlPhdr.Chunks.push_back(Sections[Index]); + continue; + } + + reportError("unknown section or fill referenced: '" + SecName.Section + + "' by program header"); + } } } @@ -757,31 +780,19 @@ template void ELFState::reportError(const Twine &Msg) { template std::vector ELFState::getPhdrFragments(const ELFYAML::ProgramHeader &Phdr, - ArrayRef SHeaders) { - DenseMap NameToFill; - for (const std::unique_ptr &D : Doc.Chunks) - if (auto S = dyn_cast(D.get())) - NameToFill[S->Name] = S; - + ArrayRef SHeaders) { std::vector Ret; - for (const ELFYAML::SectionName &SecName : Phdr.Sections) { - unsigned Index; - if (SN2I.lookup(SecName.Section, Index)) { - const typename ELFT::Shdr &H = SHeaders[Index]; - Ret.push_back({H.sh_offset, H.sh_size, H.sh_type, H.sh_addralign}); - continue; - } - - if (ELFYAML::Fill *Fill = NameToFill.lookup(SecName.Section)) { - Ret.push_back({*Fill->Offset, Fill->Size, llvm::ELF::SHT_PROGBITS, + for (const ELFYAML::Chunk *C : Phdr.Chunks) { + if (const ELFYAML::Fill *F = dyn_cast(C)) { + Ret.push_back({*F->Offset, F->Size, llvm::ELF::SHT_PROGBITS, /*ShAddrAlign=*/1}); continue; } - reportError("unknown section or fill referenced: '" + SecName.Section + - "' by program header"); + const ELFYAML::Section *S = cast(C); + const Elf_Shdr &H = SHeaders[SN2I.get(S->Name)]; + Ret.push_back({H.sh_offset, H.sh_size, H.sh_type, H.sh_addralign}); } - return Ret; } From 590f3a72c243b888ab10c4f9e71bf7f8eca99717 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 26 May 2020 17:14:23 +0800 Subject: [PATCH 075/770] [ObjectYAML][DWARF] Use .empty() to indicate if the DWARF sections are empty. --- llvm/lib/ObjectYAML/DWARFYAML.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 31c469f880042..63aea17324b66 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -16,11 +16,11 @@ namespace llvm { bool DWARFYAML::Data::isEmpty() const { - return 0 == DebugStrings.size() + AbbrevDecls.size() + ARanges.size() + - DebugRanges.size() + PubNames.Entries.size() + - PubTypes.Entries.size() + GNUPubNames.Entries.size() + - GNUPubTypes.Entries.size() + CompileUnits.size() + - DebugLines.size(); + return DebugStrings.empty() && AbbrevDecls.empty() && ARanges.empty() && + DebugRanges.empty() && PubNames.Entries.empty() && + PubTypes.Entries.empty() && GNUPubNames.Entries.empty() && + GNUPubTypes.Entries.empty() && CompileUnits.empty() && + DebugLines.empty(); } namespace yaml { From 2c04b8aacd070e88e64f08998dc583319e994d18 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 26 May 2020 17:22:23 +0800 Subject: [PATCH 076/770] [ObjectYAML][DWARF] Make variable names consistent. --- llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +- llvm/lib/ObjectYAML/DWARFYAML.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 8049d4911b86e..11b41e13b8e24 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -210,7 +210,7 @@ template <> struct MappingTraits { }; template <> struct MappingTraits { - static void mapping(IO &IO, DWARFYAML::ARange &Range); + static void mapping(IO &IO, DWARFYAML::ARange &ARange); }; template <> struct MappingTraits { diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 63aea17324b66..7ba73783cf63b 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -70,13 +70,13 @@ void MappingTraits::mapping( } void MappingTraits::mapping(IO &IO, - DWARFYAML::ARange &Range) { - IO.mapRequired("Length", Range.Length); - IO.mapRequired("Version", Range.Version); - IO.mapRequired("CuOffset", Range.CuOffset); - IO.mapRequired("AddrSize", Range.AddrSize); - IO.mapRequired("SegSize", Range.SegSize); - IO.mapRequired("Descriptors", Range.Descriptors); + DWARFYAML::ARange &ARange) { + IO.mapRequired("Length", ARange.Length); + IO.mapRequired("Version", ARange.Version); + IO.mapRequired("CuOffset", ARange.CuOffset); + IO.mapRequired("AddrSize", ARange.AddrSize); + IO.mapRequired("SegSize", ARange.SegSize); + IO.mapRequired("Descriptors", ARange.Descriptors); } void MappingTraits::mapping( From c5bbc8dd6d686175788e6c1a5fc0339814a5adfc Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 10:43:58 +0100 Subject: [PATCH 077/770] [NFC][ARM] Fix for previous commit Actually analyse code-size for the size runs... --- llvm/test/Analysis/CostModel/ARM/cast.ll | 1748 +++++++++++----------- 1 file changed, 874 insertions(+), 874 deletions(-) diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index 0e509c1f57b4f..8d022f11e62c0 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -4,10 +4,10 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-RECIP ; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-RECIP ; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-RECIP -; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-SIZE -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE -; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE define i32 @casts() { ; -- scalars -- @@ -1366,7 +1366,7 @@ define i32 @casts() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r19 = sext i16 undef to i64 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 @@ -1399,219 +1399,219 @@ define i32 @casts() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r52 = sitofp i1 undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r53 = uitofp i1 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r56 = sitofp i8 undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r57 = uitofp i8 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r60 = sitofp i16 undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r61 = uitofp i16 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r64 = sitofp i32 undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r65 = uitofp i32 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r68 = sitofp i64 undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r69 = uitofp i64 undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'casts' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -1633,7 +1633,7 @@ define i32 @casts() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r19 = sext i16 undef to i64 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 @@ -1684,201 +1684,201 @@ define i32 @casts() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'casts' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -1900,7 +1900,7 @@ define i32 @casts() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r19 = sext i16 undef to i64 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 @@ -1951,201 +1951,201 @@ define i32 @casts() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'casts' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -2167,7 +2167,7 @@ define i32 @casts() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r19 = sext i16 undef to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8 @@ -2178,80 +2178,80 @@ define i32 @casts() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r30 = fptoui float undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r31 = fptosi float undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r32 = fptoui float undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r33 = fptosi float undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r34 = fptoui float undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r35 = fptosi float undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r36 = fptoui float undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r37 = fptosi float undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r38 = fptoui float undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r39 = fptosi float undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r40 = fptoui double undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r41 = fptosi double undef to i1 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r42 = fptoui double undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r43 = fptosi double undef to i8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r44 = fptoui double undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r45 = fptosi double undef to i16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r46 = fptoui double undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r47 = fptosi double undef to i32 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r48 = fptoui double undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r49 = fptosi double undef to i64 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r50 = sitofp i1 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r51 = uitofp i1 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r52 = sitofp i1 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r53 = uitofp i1 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r54 = sitofp i8 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r55 = uitofp i8 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r56 = sitofp i8 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r57 = uitofp i8 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r58 = sitofp i16 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r59 = uitofp i16 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r60 = sitofp i16 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r61 = uitofp i16 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r62 = sitofp i32 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r63 = uitofp i32 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r64 = sitofp i32 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r65 = uitofp i32 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r66 = sitofp i64 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r67 = uitofp i64 undef to float +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r68 = sitofp i64 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r70 = sext <8 x i8> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r71 = sext <16 x i8> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -2260,159 +2260,159 @@ define i32 @casts() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %r0 = sext i1 undef to i8 %r1 = zext i1 undef to i8 @@ -2900,12 +2900,12 @@ define i32 @load_extends() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -2922,26 +2922,26 @@ define i32 @load_extends() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'load_extends' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -2954,30 +2954,30 @@ define i32 @load_extends() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'load_extends' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -2990,19 +2990,19 @@ define i32 @load_extends() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'load_extends' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -3028,17 +3028,17 @@ define i32 @load_extends() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %loadi8 = load i8, i8* undef %loadi16 = load i16, i16* undef @@ -3144,29 +3144,29 @@ define i32 @bitcasts() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'bitcasts' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'bitcasts' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'bitcasts' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -3177,7 +3177,7 @@ define i32 @bitcasts() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %a = bitcast i32 undef to i32 %b = bitcast float undef to float From 2e365ca2f7ce7a1f4a3938d79b894324b383ce5c Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Sat, 23 May 2020 18:34:08 +0300 Subject: [PATCH 078/770] [DebugInfo/llvm-objdump] - Print "ZERO terminator" for terminator entries when dumping .eh_frame. A CIE with the Length == 0 is a terminator: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html And GNU objdump recognizes them and prints the following for such entries: "00000000 ZERO terminator" This patch teaches llvm-objdump to do the same. I had to update tests to use "CHECK-NEXT" too. (Note: it looks perhaps not right that printing is done inside the DebugInfo library, I'd expect to see the change in the llvm-objdump's code somewhere instead, but that is how it done atm). Differential revision: https://reviews.llvm.org/D80476 --- llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 6 +++ .../tools/llvm-objdump/eh_frame-mipsel.test | 49 +++++++++---------- .../tools/llvm-objdump/eh_frame_zero_cie.test | 13 ++--- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 0e8d521f94330..51dc54e49fcc9 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -304,6 +304,12 @@ constexpr uint64_t getCIEId(bool IsDWARF64, bool IsEH) { } void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { + // A CIE with a zero length is a terminator entry in the .eh_frame sextion. + if (IsEH && Length == 0) { + OS << format("%08" PRIx64, Offset) << " ZERO terminator\n"; + return; + } + OS << format("%08" PRIx64, Offset) << format(" %0*" PRIx64, IsDWARF64 ? 16 : 8, Length) << format(" %0*" PRIx64, IsDWARF64 && !IsEH ? 16 : 8, diff --git a/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test b/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test index e89d9aeb53cb2..91058e28effc1 100644 --- a/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test +++ b/llvm/test/tools/llvm-objdump/eh_frame-mipsel.test @@ -1,28 +1,25 @@ # RUN: llvm-objdump --dwarf=frames %p/Inputs/eh_frame.elf-mipsel | FileCheck %s -# CHECK: .eh_frame contents: - -# CHECK: 00000000 00000018 00000000 CIE -# CHECK: Version: 1 -# CHECK: Augmentation: "zPLR" -# CHECK: Code alignment factor: 1 -# CHECK: Data alignment factor: -4 -# CHECK: Return address column: 31 -# CHECK: Augmentation data: 80 90 0B 41 00 00 0B - -# CHECK: DW_CFA_def_cfa: reg29 +0 - -# CHECK: 0000001c 00000018 00000020 FDE cie=00000000 pc=00400890...004008dc -# CHECK: DW_CFA_advance_loc: 4 -# CHECK: DW_CFA_def_cfa_offset: +24 -# CHECK: DW_CFA_advance_loc: 4 -# CHECK: DW_CFA_offset: reg31 -4 -# CHECK: DW_CFA_nop: - -## FIXME: GNU objdump prints "00000038 ZERO terminator" instead. -# CHECK: 00000038 00000000 00000000 CIE -# CHECK: Version: 0 -# CHECK: Augmentation: "" -# CHECK: Code alignment factor: 0 -# CHECK: Data alignment factor: 0 -# CHECK: Return address column: 0 +# CHECK: .eh_frame contents: +# CHECK-EMPTY: +# CHECK-NEXT: 00000000 00000018 00000000 CIE +# CHECK-NEXT: Version: 1 +# CHECK-NEXT: Augmentation: "zPLR" +# CHECK-NEXT: Code alignment factor: 1 +# CHECK-NEXT: Data alignment factor: -4 +# CHECK-NEXT: Return address column: 31 +# CHECK-NEXT: Personality Address: 0000000000410b90 +# CHECK-NEXT: Augmentation data: 80 90 0B 41 00 00 0B +# CHECK-EMPTY: +# CHECK-NEXT: DW_CFA_def_cfa: reg29 +0 +# CHECK-EMPTY: +# CHECK-NEXT: 0000001c 00000018 00000020 FDE cie=00000000 pc=00400890...004008dc +# CHECK-NEXT: LSDA Address: 0000000000400a90 +# CHECK-NEXT: DW_CFA_advance_loc: 4 +# CHECK-NEXT: DW_CFA_def_cfa_offset: +24 +# CHECK-NEXT: DW_CFA_advance_loc: 4 +# CHECK-NEXT: DW_CFA_offset: reg31 -4 +# CHECK-NEXT: DW_CFA_nop: +# CHECK-EMPTY: +# CHECK-NEXT: 00000038 ZERO terminator +# CHECK-NOT: {{.}} diff --git a/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test b/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test index 510c944028cdf..30bbec9b97230 100644 --- a/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test +++ b/llvm/test/tools/llvm-objdump/eh_frame_zero_cie.test @@ -1,11 +1,6 @@ # RUN: llvm-objdump --dwarf=frames %p/Inputs/eh_frame_zero_cie.o 2>/dev/null | FileCheck %s -# CHECK: .eh_frame contents: - -## FIXME: GNU objdump prints "00000000 ZERO terminator" instead. -# CHECK: 00000000 00000000 00000000 CIE -# CHECK: Version: 0 -# CHECK: Augmentation: "" -# CHECK: Code alignment factor: 0 -# CHECK: Data alignment factor: 0 -# CHECK: Return address column: 0 +# CHECK: .eh_frame contents: +# CHECK-EMPTY: +# CHECK-NEXT: 00000000 ZERO terminator +# CHECK-NOT: {{.}} From 8b4639d0a0e0e65f23e0315f7ade83b9126472af Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 26 May 2020 10:12:04 +0100 Subject: [PATCH 079/770] [X86][AVX] Add some initial movmsk combine tests Show failure to reduce the signbit extraction for 256-bit integer vectors on AVX1 targets where the pcmpgt/ashr has to be done with split 128-bit vectors. --- llvm/test/CodeGen/X86/combine-movmsk-avx.ll | 98 +++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 llvm/test/CodeGen/X86/combine-movmsk-avx.ll diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll new file mode 100644 index 0000000000000..0de723e287c62 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 + +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) + +; +; TODO - Avoid sign extension ops when just extracting the sign bits. +; + +define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) { +; AVX1-LABEL: movmskpd_cmpgt_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX1-NEXT: vmovmskpd %ymm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskpd_cmpgt_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovmskpd %ymm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %1 = icmp sgt <4 x i64> zeroinitializer, %a0 + %2 = sext <4 x i1> %1 to <4 x i64> + %3 = bitcast <4 x i64> %2 to <4 x double> + %4 = tail call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %3) + ret i32 %4 +} + +define i32 @movmskps_ashr_v8i32(<8 x i32> %a0) { +; AVX1-LABEL: movmskps_ashr_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vmovmskps %ymm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskps_ashr_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %1 = ashr <8 x i32> %a0, + %2 = bitcast <8 x i32> %1 to <8 x float> + %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2) + ret i32 %3 +} + +define i32 @movmskps_sext_v4i64(<4 x i32> %a0) { +; AVX1-LABEL: movmskps_sext_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovmskpd %ymm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskps_sext_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: vmovmskpd %ymm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %1 = sext <4 x i32> %a0 to <4 x i64> + %2 = bitcast <4 x i64> %1 to <4 x double> + %3 = tail call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %2) + ret i32 %3 +} + +define i32 @movmskps_sext_v8i32(<8 x i16> %a0) { +; AVX1-LABEL: movmskps_sext_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskps_sext_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %1 = sext <8 x i16> %a0 to <8 x i32> + %2 = bitcast <8 x i32> %1 to <8 x float> + %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2) + ret i32 %3 +} From 6f802ec4333cc1227bb37e258a81e9a588f964dc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 26 May 2020 10:55:43 +0100 Subject: [PATCH 080/770] [X86] Fix fshr comment copy+paste typo. NFC. Noticed by @foad on D80466. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 54a80151eb69a..6bf61af00590c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19073,7 +19073,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow(); // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw. - // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))) >> bw. + // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))). if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) && !isa(Amt)) { unsigned EltSizeInBits = VT.getScalarSizeInBits(); From 09f7dcb64e1b2a3568ddb6ab327dd2f4a4d3d0fe Mon Sep 17 00:00:00 2001 From: hsmahesha Date: Tue, 26 May 2020 15:47:03 +0530 Subject: [PATCH 081/770] [AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic Summary: Clean-up code around mem ops clustering logic. This patch cleans up code within the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut. Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar Reviewed By: foad Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80119 --- llvm/lib/CodeGen/MachineScheduler.cpp | 64 ++++++++++++++++----------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 55b0075338619..92fd3edf92364 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1580,34 +1580,48 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( return; llvm::sort(MemOpRecords); + + // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to + // cluster mem ops collected within `MemOpRecords` array. unsigned ClusterLength = 1; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { - SUnit *SUa = MemOpRecords[Idx].SU; - SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps, - MemOpRecords[Idx + 1].BaseOps, - ClusterLength + 1)) { - if (SUa->NodeNum > SUb->NodeNum) - std::swap(SUa, SUb); - if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { - LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" - << SUb->NodeNum << ")\n"); - // Copy successor edges from SUa to SUb. Interleaving computation - // dependent on SUa can prevent load combining due to register reuse. - // Predecessor edges do not need to be copied from SUb to SUa since - // nearby loads should have effectively the same inputs. - for (const SDep &Succ : SUa->Succs) { - if (Succ.getSUnit() == SUb) - continue; - LLVM_DEBUG(dbgs() - << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); - DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); - } - ++ClusterLength; - } else - ClusterLength = 1; - } else + // Decision to cluster mem ops is taken based on target dependent logic + auto MemOpa = MemOpRecords[Idx]; + auto MemOpb = MemOpRecords[Idx + 1]; + ++ClusterLength; + if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, + ClusterLength)) { + // Current mem ops pair could not be clustered, reset cluster length, and + // go to next pair + ClusterLength = 1; + continue; + } + + SUnit *SUa = MemOpa.SU; + SUnit *SUb = MemOpb.SU; + if (SUa->NodeNum > SUb->NodeNum) + std::swap(SUa, SUb); + + // FIXME: Is this check really required? + if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { ClusterLength = 1; + continue; + } + + LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); + + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since + // nearby loads should have effectively the same inputs. + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) + continue; + LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum + << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + } } } From 5229dd1366ab1423d66d3d16dddff6fbaee049d8 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 26 May 2020 06:23:57 -0400 Subject: [PATCH 082/770] [build] Add LLVM_LOCAL_RPATH which can set an rpath on just unit test binaries After D80096, bots that build clang for distribution and that can't use system gcc / libstdc++ need to pass a working rpath so that unit test binaries can run. The method suggested in GettingStarted.rst works fine for local development, but it results in an absolute local rpath ending up even in distributed binaries like clang, which is both ugly and unnecessary. Add an explicit toggle that can be used to add an rpath only for the non-distributed binaries that need it. Differential Revision: https://reviews.llvm.org/D80534 --- llvm/CMakeLists.txt | 3 +++ llvm/cmake/modules/AddLLVM.cmake | 4 ++++ llvm/docs/GettingStarted.rst | 10 ++++++++++ 3 files changed, 17 insertions(+) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 3fbe1e6cf9a4b..06b8646ca37ba 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -439,6 +439,9 @@ set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING option(LLVM_FORCE_USE_OLD_TOOLCHAIN "Set to ON to force using an old, unsupported host toolchain." OFF) +set(LLVM_LOCAL_RPATH "" CACHE FILEPATH + "If set, an absolute path added as rpath on binaries that do not already contain an executable-relative rpath.") + option(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN "Set to ON to only warn when using a toolchain which is about to be deprecated, instead of emitting an error." OFF) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 5b6b7f56777b4..9f14561fe0a6f 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -821,6 +821,10 @@ macro(add_llvm_executable name) if(NOT ARG_NO_INSTALL_RPATH) llvm_setup_rpath(${name}) + elseif (LLVM_LOCAL_RPATH) + set_target_properties(${name} PROPERTIES + BUILD_WITH_INSTALL_RPATH On + INSTALL_RPATH "${LLVM_LOCAL_RPATH}") endif() if(DEFINED windows_resource_file) diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index 46e337d2cec95..5cce01b72c116 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -340,6 +340,16 @@ If you fail to set rpath, most LLVM binaries will fail on startup with a message from the loader similar to ``libstdc++.so.6: version `GLIBCXX_3.4.20' not found``. This means you need to tweak the -rpath linker flag. +This method will add an absolute path to the rpath of all executables. That's +fine for local development. If you want to distribute the binaries you build +so that they can run on older systems, copy ``libstdc++.so.6`` into the +``lib/`` directory. All of LLVM's shipping binaries have an rpath pointing at +``$ORIGIN/../lib``, so they will find ``libstdc++.so.6`` there. Non-distributed +binaries don't have an rpath set and won't find ``libstdc++.so.6``. Pass +``-DLLVM_LOCAL_RPATH="$HOME/toolchains/lib64"`` to cmake to add an absolute +path to ``libstdc++.so.6`` as above. Since these binaries are not distributed, +having an absolute local path is fine for them. + When you build Clang, you will need to give *it* access to modern C++ standard library in order to use it as your new host in part of a bootstrap. There are two easy ways to do this, either build (and install) libc++ along From 3785eb83af4161bd52ed993ef3a2184c998071e6 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Tue, 26 May 2020 12:19:07 +0200 Subject: [PATCH 083/770] Add support for binary operators in Syntax Trees Reviewers: gribozavr2 Reviewed By: gribozavr2 Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80540 --- clang/include/clang/Tooling/Syntax/Nodes.h | 22 ++ clang/lib/Tooling/Syntax/BuildTree.cpp | 19 +- clang/lib/Tooling/Syntax/Nodes.cpp | 23 ++ clang/unittests/Tooling/Syntax/TreeTest.cpp | 269 +++++++++++++++++++- 4 files changed, 315 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index f4d482bb848c6..5db99d4b9e350 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -40,6 +40,7 @@ enum class NodeKind : uint16_t { // Expressions. UnknownExpression, + BinaryOperatorExpression, // Statements. UnknownStatement, @@ -104,6 +105,9 @@ enum class NodeRole : uint8_t { BodyStatement, // Roles specific to particular node kinds. + BinaryOperatorExpression_leftHandSide, + BinaryOperatorExpression_operatorToken, + BinaryOperatorExpression_rightHandSide, CaseStatement_value, IfStatement_thenStatement, IfStatement_elseKeyword, @@ -158,6 +162,24 @@ class UnknownExpression final : public Expression { } }; +/// +/// +/// For example: +/// a + b +/// a bitor 1 +/// a |= b +/// a and_eq b +class BinaryOperatorExpression final : public Expression { +public: + BinaryOperatorExpression() : Expression(NodeKind::BinaryOperatorExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::BinaryOperatorExpression; + } + syntax::Expression *lhs(); + syntax::Leaf *operatorToken(); + syntax::Expression *rhs(); +}; + /// An abstract node for C++ statements, e.g. 'while', 'if', etc. /// FIXME: add accessors for semicolon of statements that have it. class Statement : public Tree { diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 11058edec615d..8fee44cdbf10d 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -11,6 +11,7 @@ #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeLoc.h" @@ -594,10 +595,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { for (auto *D : DS->decls()) Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = llvm::dyn_cast_or_null(S)) { - // Do not recurse into subexpressions. - // We do not have syntax trees for expressions yet, so we only want to see - // the first top-level expression. - return WalkUpFromExpr(E->IgnoreImplicit()); + return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); } return RecursiveASTVisitor::TraverseStmt(S); } @@ -610,6 +608,19 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } + bool WalkUpFromBinaryOperator(BinaryOperator *S) { + Builder.markExprChild( + S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::BinaryOperatorExpression_operatorToken); + Builder.markExprChild( + S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BinaryOperatorExpression, S); + return true; + } + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { auto Tokens = Builder.getDeclarationRange(S); if (Tokens.front().kind() == tok::coloncolon) { diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 75f025e5f8536..84c0143db81d3 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -18,6 +18,8 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "TranslationUnit"; case NodeKind::UnknownExpression: return OS << "UnknownExpression"; + case NodeKind::BinaryOperatorExpression: + return OS << "BinaryOperatorExpression"; case NodeKind::UnknownStatement: return OS << "UnknownStatement"; case NodeKind::DeclarationStatement: @@ -110,6 +112,12 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "IfStatement_elseKeyword"; case syntax::NodeRole::IfStatement_elseStatement: return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::BinaryOperatorExpression_leftHandSide: + return OS << "BinaryOperatorExpression_leftHandSide"; + case syntax::NodeRole::BinaryOperatorExpression_operatorToken: + return OS << "BinaryOperatorExpression_operatorToken"; + case syntax::NodeRole::BinaryOperatorExpression_rightHandSide: + return OS << "BinaryOperatorExpression_rightHandSide"; case syntax::NodeRole::ReturnStatement_value: return OS << "ReturnStatement_value"; case syntax::NodeRole::ExpressionStatement_expression: @@ -142,6 +150,21 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { llvm_unreachable("invalid role"); } +syntax::Expression *syntax::BinaryOperatorExpression::lhs() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide)); +} + +syntax::Leaf *syntax::BinaryOperatorExpression::operatorToken() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BinaryOperatorExpression_operatorToken)); +} + +syntax::Expression *syntax::BinaryOperatorExpression::rhs() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BinaryOperatorExpression_rightHandSide)); +} + syntax::Leaf *syntax::SwitchStatement::switchKeyword() { return llvm::cast_or_null( findChild(syntax::NodeRole::IntroducerKeyword)); diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index d11a3652c8e1f..634f99f7c395c 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -564,7 +564,8 @@ void test() { |-{ |-ExpressionStatement | |-UnknownExpression - | | |-test + | | |-UnknownExpression + | | | `-test | | |-( | | `-) | `-; @@ -576,14 +577,16 @@ void test() { | |-) | |-ExpressionStatement | | |-UnknownExpression - | | | |-test + | | | |-UnknownExpression + | | | | `-test | | | |-( | | | `-) | | `-; | |-else | `-ExpressionStatement | |-UnknownExpression - | | |-test + | | |-UnknownExpression + | | | `-test | | |-( | | `-) | `-; @@ -591,6 +594,237 @@ void test() { )txt"); } +TEST_F(SyntaxTreeTest, BinaryOperator) { + expectTreeDumpEqual( + R"cpp( +void test(int a) { + 1 - 2; + 1 == 2; + a = 1; + a <<= 1; + + true || false; + true or false; + + 1 & 2; + 1 bitand 2; + + a ^= 3; + a xor_eq 3; +} + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-void + |-SimpleDeclarator + | |-test + | `-ParametersAndQualifiers + | |-( + | |-SimpleDeclaration + | | |-int + | | `-SimpleDeclarator + | | `-a + | `-) + `-CompoundStatement + |-{ + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-1 + | | |-- + | | `-UnknownExpression + | | `-2 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-1 + | | |-== + | | `-UnknownExpression + | | `-2 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-a + | | |-= + | | `-UnknownExpression + | | `-1 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-a + | | |-<<= + | | `-UnknownExpression + | | `-1 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-true + | | |-|| + | | `-UnknownExpression + | | `-false + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-true + | | |-or + | | `-UnknownExpression + | | `-false + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-1 + | | |-& + | | `-UnknownExpression + | | `-2 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-1 + | | |-bitand + | | `-UnknownExpression + | | `-2 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-a + | | |-^= + | | `-UnknownExpression + | | `-3 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-a + | | |-xor_eq + | | `-UnknownExpression + | | `-3 + | `-; + `-} +)txt"); +} + +TEST_F(SyntaxTreeTest, NestedBinaryOperator) { + expectTreeDumpEqual( + R"cpp( +void test(int a, int b) { + (1 + 2) * (4 / 2); + a + b + 42; + a = b = 42; + a + b * 4 + 2; + a % 2 + b * 42; +} + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-void + |-SimpleDeclarator + | |-test + | `-ParametersAndQualifiers + | |-( + | |-SimpleDeclaration + | | |-int + | | `-SimpleDeclarator + | | `-a + | |-, + | |-SimpleDeclaration + | | |-int + | | `-SimpleDeclarator + | | `-b + | `-) + `-CompoundStatement + |-{ + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | |-( + | | | |-BinaryOperatorExpression + | | | | |-UnknownExpression + | | | | | `-1 + | | | | |-+ + | | | | `-UnknownExpression + | | | | `-2 + | | | `-) + | | |-* + | | `-UnknownExpression + | | |-( + | | |-BinaryOperatorExpression + | | | |-UnknownExpression + | | | | `-4 + | | | |-/ + | | | `-UnknownExpression + | | | `-2 + | | `-) + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-BinaryOperatorExpression + | | | |-UnknownExpression + | | | | `-a + | | | |-+ + | | | `-UnknownExpression + | | | `-b + | | |-+ + | | `-UnknownExpression + | | `-42 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-a + | | |-= + | | `-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-b + | | |-= + | | `-UnknownExpression + | | `-42 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-BinaryOperatorExpression + | | | |-UnknownExpression + | | | | `-a + | | | |-+ + | | | `-BinaryOperatorExpression + | | | |-UnknownExpression + | | | | `-b + | | | |-* + | | | `-UnknownExpression + | | | `-4 + | | |-+ + | | `-UnknownExpression + | | `-2 + | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-BinaryOperatorExpression + | | | |-UnknownExpression + | | | | `-a + | | | |-% + | | | `-UnknownExpression + | | | `-2 + | | |-+ + | | `-BinaryOperatorExpression + | | |-UnknownExpression + | | | `-b + | | |-* + | | `-UnknownExpression + | | `-42 + | `-; + `-} +)txt"); +} + TEST_F(SyntaxTreeTest, MultipleDeclaratorsGrouping) { expectTreeDumpEqual( R"cpp( @@ -1201,10 +1435,12 @@ void test() { |-IfStatement | |-I: if | |-I: ( - | |-I: UnknownExpression - | | |-I: 1 + | |-I: BinaryOperatorExpression + | | |-I: UnknownExpression + | | | `-I: 1 | | |-I: + - | | `-I: 1 + | | `-I: UnknownExpression + | | `-I: 1 | |-I: ) | |-I: CompoundStatement | | |-I: { @@ -1312,13 +1548,17 @@ void f(int xs[static 10]); | | | `-] | | |-= | | `-UnknownExpression -| | |-{ -| | |-1 -| | |-, -| | |-2 -| | |-, -| | |-3 -| | `-} +| | `-UnknownExpression +| | |-{ +| | |-UnknownExpression +| | | `-1 +| | |-, +| | |-UnknownExpression +| | | `-2 +| | |-, +| | |-UnknownExpression +| | | `-3 +| | `-} | `-; `-SimpleDeclaration |-void @@ -1628,7 +1868,8 @@ const int const *const *volatile b; | | |-= | | `-UnknownExpression | | |-- -| | `-1 +| | `-UnknownExpression +| | `-1 | `-; |-SimpleDeclaration | |-int From 8aaabadeced32a1cd959a5b1524b9c927e82bcc0 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 11:27:57 +0100 Subject: [PATCH 084/770] [CostModel] Unify getCastInstrCost Add the remaining cast instruction opcodes to the base implementation of getUserCost and directly return the result. This allows getInstructionThroughput to return getUserCost for the casts. This has required changes to PPC and SystemZ because they implement getUserCost and/or getCastInstrCost with adjustments for vector operations. Adjusts have also been made in the remaining backends that implement the method so that they still produce a cost of zero or one for cost kinds other than throughput. Differential Revision: https://reviews.llvm.org/D79848 --- .../llvm/Analysis/TargetTransformInfoImpl.h | 14 ++++---- llvm/lib/Analysis/TargetTransformInfo.cpp | 6 ++-- .../AArch64/AArch64TargetTransformInfo.cpp | 13 ++++++-- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 30 ++++++++++------- .../Hexagon/HexagonTargetTransformInfo.cpp | 6 +++- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 9 +++-- .../SystemZ/SystemZTargetTransformInfo.cpp | 6 ++++ .../lib/Target/X86/X86TargetTransformInfo.cpp | 33 +++++++++++-------- 8 files changed, 76 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 60de70dcb16a0..bd8d29cb22a12 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -826,18 +826,18 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return TTI::TCC_Expensive; case Instruction::IntToPtr: case Instruction::PtrToInt: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: case Instruction::Trunc: + case Instruction::FPTrunc: case Instruction::BitCast: - if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == - TTI::TCC_Free) - return TTI::TCC_Free; - break; case Instruction::FPExt: case Instruction::SExt: case Instruction::ZExt: - if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) - return TTI::TCC_Free; - break; + case Instruction::AddrSpaceCast: + return TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I); } // By default, just classify everything as 'basic'. return TTI::TCC_Basic; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 86952a5ad6592..a14199515faf5 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1325,10 +1325,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: - case Instruction::AddrSpaceCast: { - Type *SrcTy = I->getOperand(0)->getType(); - return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I); - } + case Instruction::AddrSpaceCast: + return getUserCost(I, CostKind); case Instruction::ExtractElement: { const ExtractElementInst *EEI = cast(I); ConstantInt *CI = dyn_cast(I->getOperand(1)); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 1324945c4d4ea..f0961646c31ff 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -295,11 +295,18 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, } } + // TODO: Allow non-throughput costs that aren't binary. + auto AdjustCost = [&CostKind](int Cost) { + if (CostKind != TTI::TCK_RecipThroughput) + return Cost == 0 ? 0 : 1; + return Cost; + }; + EVT SrcTy = TLI->getValueType(DL, Src); EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I)); static const TypeConversionCostTblEntry ConversionTbl[] = { @@ -401,9 +408,9 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); - return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I)); } int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 1ca74bfc3df08..c1af19727ba2b 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -173,6 +173,13 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // TODO: Allow non-throughput costs that aren't binary. + auto AdjustCost = [&CostKind](int Cost) { + if (CostKind != TTI::TCK_RecipThroughput) + return Cost == 0 ? 0 : 1; + return Cost; + }; + // Single to/from double precision conversions. static const CostTblEntry NEONFltDblTbl[] = { // Vector fptrunc/fpext conversions. @@ -185,14 +192,14 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, ISD == ISD::FP_EXTEND)) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) - return LT.first * Entry->Cost; + return AdjustCost(LT.first * Entry->Cost); } EVT SrcTy = TLI->getValueType(DL, Src); EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I)); // The extend of a load is free if (I && isa(I->getOperand(0))) { @@ -212,7 +219,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, }; if (const auto *Entry = ConvertCostTableLookup( LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); static const TypeConversionCostTblEntry MVELoadConversionTbl[] = { {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0}, @@ -226,7 +233,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(MVELoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); } } @@ -253,7 +260,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) { - return Entry->Cost; + return AdjustCost(Entry->Cost); } } @@ -347,7 +354,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); } // Scalar float to integer conversions. @@ -377,7 +384,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); } // Scalar integer to float conversions. @@ -408,7 +415,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); } // MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one @@ -433,7 +440,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost * ST->getMVEVectorCostFactor(); + return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor()); } // Scalar integer conversion costs. @@ -452,13 +459,14 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) - return Entry->Cost; + return AdjustCost(Entry->Cost); } int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return AdjustCost( + BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I)); } int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 92e32ca99090e..381941df2fb46 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -263,7 +263,11 @@ unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, std::pair SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy); std::pair DstLT = TLI.getTypeLegalizationCost(DL, DstTy); - return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); + unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); + // TODO: Allow non-throughput costs that aren't binary. + if (CostKind != TTI::TCK_RecipThroughput) + return Cost == 0 ? 0 : 1; + return Cost; } return 1; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 002905febbc8b..a41c6b41a991b 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -212,7 +212,8 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, unsigned PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind) { - if (U->getType()->isVectorTy()) { + // We already implement getCastInstrCost and perform the vector adjustment there. + if (!isa(U) && U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); return LT.first * BaseT::getUserCost(U, Operands, CostKind); @@ -760,7 +761,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); - return vectorCostAdjustment(Cost, Opcode, Dst, Src); + Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src); + // TODO: Allow non-throughput costs that aren't binary. + if (CostKind != TTI::TCK_RecipThroughput) + return Cost == 0 ? 0 : 1; + return Cost; } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index d9efb40f0ab65..bce02cc793bf6 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -691,6 +691,12 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::TargetCostKind CostKind, const Instruction *I) { + // FIXME: Can the logic below also be used for these cost kinds? + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { + int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return BaseCost == 0 ? BaseCost : 1; + } + unsigned DstScalarBits = Dst->getScalarSizeInBits(); unsigned SrcScalarBits = Src->getScalarSizeInBits(); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 4170b102f2b31..6bfcadeaf8b67 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1368,6 +1368,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // TODO: Allow non-throughput costs that aren't binary. + auto AdjustCost = [&CostKind](int Cost) { + if (CostKind != TTI::TCK_RecipThroughput) + return Cost == 0 ? 0 : 1; + return Cost; + }; + // FIXME: Need a better design of the cost table to handle non-simple types of // potential massive combinations (elem_num x src_type x dst_type). @@ -1969,7 +1976,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (ST->hasSSE2() && !ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) - return LTSrc.first * Entry->Cost; + return AdjustCost(LTSrc.first * Entry->Cost); } EVT SrcTy = TLI->getValueType(DL, Src); @@ -1977,7 +1984,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); + return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind)); MVT SimpleSrcTy = SrcTy.getSimpleVT(); MVT SimpleDstTy = DstTy.getSimpleVT(); @@ -1986,59 +1993,59 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (ST->hasBWI()) if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); if (ST->hasDQI()) if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); if (ST->hasAVX512()) if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); } if (ST->hasBWI()) if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); if (ST->hasDQI()) if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); if (ST->hasAVX512()) if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); if (ST->hasAVX2()) { if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); } if (ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); } if (ST->hasSSE41()) { if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); } if (ST->hasSSE2()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; + return AdjustCost(Entry->Cost); } - return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I)); } int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, From 6f5431846bbf3270d8fc605324e8843c5aaf579b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= Date: Mon, 13 Apr 2020 15:57:18 +0200 Subject: [PATCH 085/770] [analyzer][RetainCount] Remove the CheckOSObject option As per http://lists.llvm.org/pipermail/cfe-dev/2019-August/063215.html, lets get rid of this option. It presents 2 issues that have bugged me for years now: * OSObject is NOT a boolean option. It in fact has 3 states: * osx.OSObjectRetainCount is enabled but OSObject it set to false: RetainCount regards the option as disabled. * sx.OSObjectRetainCount is enabled and OSObject it set to true: RetainCount regards the option as enabled. * osx.OSObjectRetainCount is disabled: RetainCount regards the option as disabled. * The hack involves directly modifying AnalyzerOptions::ConfigTable, which shouldn't even be public in the first place. This still isn't really ideal, because it would be better to preserve the option and remove the checker (we want visible checkers to be associated with diagnostics, and hidden options like this one to be associated with changing how the modeling is done), but backwards compatibility is an issue. Differential Revision: https://reviews.llvm.org/D78097 --- .../clang/StaticAnalyzer/Checkers/Checkers.td | 9 ------- .../RetainCountChecker/RetainCountChecker.cpp | 24 +++---------------- clang/test/Analysis/analyzer-config.c | 1 - .../Analysis/test-separate-retaincount.cpp | 4 ---- 4 files changed, 3 insertions(+), 35 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index f0ad8326929e5..bc4b7d00e2d40 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -1094,15 +1094,6 @@ def NSErrorChecker : Checker<"NSError">, def RetainCountChecker : Checker<"RetainCount">, HelpText<"Check for leaks and improper reference count management">, CheckerOptions<[ - CmdLineOption, CmdLineOptiongetValue() == Value; - return false; -} - void ento::registerRetainCountChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker(); Chk->TrackObjCAndCFObjects = true; - Chk->TrackNSCFStartParam = getOption(Mgr.getAnalyzerOptions(), - "TrackNSCFStartParam", - "true"); + Chk->TrackNSCFStartParam = Mgr.getAnalyzerOptions().getCheckerBooleanOption( + Mgr.getCurrentCheckerName(), "TrackNSCFStartParam"); } bool ento::shouldRegisterRetainCountChecker(const CheckerManager &mgr) { @@ -1509,10 +1494,7 @@ bool ento::shouldRegisterRetainCountChecker(const CheckerManager &mgr) { void ento::registerOSObjectRetainCountChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker(); - if (!getOption(Mgr.getAnalyzerOptions(), - "CheckOSObject", - "false")) - Chk->TrackOSObjects = true; + Chk->TrackOSObjects = true; } bool ento::shouldRegisterOSObjectRetainCountChecker(const CheckerManager &mgr) { diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 7784673873821..cb3d40688e91a 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -99,7 +99,6 @@ // CHECK-NEXT: optin.osx.cocoa.localizability.NonLocalizedStringChecker:AggressiveReport = false // CHECK-NEXT: optin.performance.Padding:AllowedPad = 24 // CHECK-NEXT: osx.NumberObjectConversion:Pedantic = false -// CHECK-NEXT: osx.cocoa.RetainCount:CheckOSObject = true // CHECK-NEXT: osx.cocoa.RetainCount:TrackNSCFStartParam = false // CHECK-NEXT: prune-paths = true // CHECK-NEXT: region-store-small-struct-limit = 2 diff --git a/clang/test/Analysis/test-separate-retaincount.cpp b/clang/test/Analysis/test-separate-retaincount.cpp index 621e1d120bbb2..41efad452e5ac 100644 --- a/clang/test/Analysis/test-separate-retaincount.cpp +++ b/clang/test/Analysis/test-separate-retaincount.cpp @@ -5,10 +5,6 @@ // RUN: %clang_analyze_cc1 -std=c++14 -DNO_OS_OBJECT -verify %s \ // RUN: -analyzer-checker=core,osx \ // RUN: -analyzer-disable-checker osx.OSObjectRetainCount -// -// RUN: %clang_analyze_cc1 -std=c++14 -DNO_OS_OBJECT -verify %s \ -// RUN: -analyzer-checker=core,osx \ -// RUN: -analyzer-config "osx.cocoa.RetainCount:CheckOSObject=false" #include "os_object_base.h" From bd9dce8f9acd710ed62bab44ad3563209503cd72 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 12:17:26 +0100 Subject: [PATCH 086/770] [CostModel] getUserCost for intrinsic throughput Last part of recommitting 'Unify Intrinsic Costs' 259eb619ff6dcd5b6111d1686e18559b9ca004d4. This patch now uses getUserCost from getInstructionThroughput. Differential Revision: https://reviews.llvm.org/D80012 --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 8 ++++++++ llvm/lib/Analysis/TargetTransformInfo.cpp | 9 +++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index bd8d29cb22a12..f98b8bf7da2c9 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -776,6 +776,14 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); + // FIXME: We shouldn't have to special-case intrinsics here. + if (CostKind == TTI::TCK_RecipThroughput) { + if (const IntrinsicInst *II = dyn_cast(U)) { + IntrinsicCostAttributes CostAttrs(*II); + return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); + } + } + // FIXME: Unlikely to be true for anything but CodeSize. if (const auto *CB = dyn_cast(U)) { const Function *F = CB->getCalledFunction(); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a14199515faf5..9f319c40ae6a7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -260,7 +260,8 @@ int TargetTransformInfo::getUserCost(const User *U, ArrayRef Operands, enum TargetCostKind CostKind) const { int Cost = TTIImpl->getUserCost(U, Operands, CostKind); - assert(Cost >= 0 && "TTI should not produce negative costs!"); + assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && + "TTI should not produce negative costs!"); return Cost; } @@ -1419,11 +1420,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr); } case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast(I)) { - IntrinsicCostAttributes CostAttrs(*II); - return getIntrinsicInstrCost(CostAttrs, CostKind); - } - return -1; + return getUserCost(I, CostKind); default: // We don't have any information on this instruction. return -1; From 4b7812116d513a66fb5fb3c83e7d8be08c1efc65 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 26 May 2020 12:32:29 +0100 Subject: [PATCH 087/770] MachineInstr.h - remove unnecessary MachineMemOperand forward declaration. NFC. We already have to include MachineMemOperand.h --- llvm/include/llvm/CodeGen/MachineInstr.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 48db14e6cd695..1c841155e6434 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -42,7 +42,6 @@ class DIExpression; class DILocalVariable; class MachineBasicBlock; class MachineFunction; -class MachineMemOperand; class MachineRegisterInfo; class ModuleSlotTracker; class raw_ostream; From 0d52a7d038e189770984594a6ca71bea50fee4d9 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 20 May 2020 11:55:28 +0000 Subject: [PATCH 088/770] [libc][NFC] Simplify memcpy implementation Summary: This is a NFC, it aims at simplifying both the code and build files. Reviewers: abrachet, sivachandra Subscribers: mgorny, tschuett, ecnelises, libc-commits, courbet Tags: #libc-project Differential Revision: https://reviews.llvm.org/D80291 --- libc/src/string/CMakeLists.txt | 21 +---- libc/src/string/memcpy.cpp | 22 ----- libc/src/string/memcpy_arch_specific.h.def | 65 ------------- libc/src/string/memory_utils/memcpy_utils.h | 18 ++-- libc/src/string/x86/memcpy.cpp | 94 +++++++++++++++++++ .../src/string/x86/memcpy_arch_specific.h.inc | 35 ------- .../string/memory_utils/memcpy_utils_test.cpp | 36 +++---- 7 files changed, 125 insertions(+), 166 deletions(-) delete mode 100644 libc/src/string/memcpy.cpp delete mode 100644 libc/src/string/memcpy_arch_specific.h.def create mode 100644 libc/src/string/x86/memcpy.cpp delete mode 100644 libc/src/string/x86/memcpy_arch_specific.h.inc diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 3fe0d861aea36..cd3a9b5f77b3f 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -68,30 +68,17 @@ endfunction() # include the relevant architecture specific implementations if(${LIBC_TARGET_MACHINE} STREQUAL "x86_64") - set(LIBC_STRING_TARGET_FOLDER "x86") + set(LIBC_STRING_TARGET_ARCH "x86") else() - set(LIBC_STRING_TARGET_FOLDER ${LIBC_TARGET_MACHINE}) + set(LIBC_STRING_TARGET_ARCH ${LIBC_TARGET_MACHINE}) endif() -add_gen_header( - memcpy_arch_specific - DEF_FILE - memcpy_arch_specific.h.def - GEN_HDR - memcpy_arch_specific.h - PARAMS - memcpy_arch_specific=${LIBC_STRING_TARGET_FOLDER}/memcpy_arch_specific.h.inc - DATA_FILES - ${LIBC_STRING_TARGET_FOLDER}/memcpy_arch_specific.h.inc -) - function(add_memcpy memcpy_name) add_implementation(memcpy ${memcpy_name} - SRCS ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp + SRCS ${LIBC_SOURCE_DIR}/src/string/${LIBC_STRING_TARGET_ARCH}/memcpy.cpp HDRS ${LIBC_SOURCE_DIR}/src/string/memcpy.h DEPENDS .memory_utils.memory_utils - .memcpy_arch_specific libc.include.string COMPILE_OPTIONS -fno-builtin-memcpy @@ -144,4 +131,4 @@ add_bzero(bzero MARCH native) # Add all other relevant implementations for the native target. # ------------------------------------------------------------------------------ -include(${LIBC_STRING_TARGET_FOLDER}/CMakeLists.txt) +include(${LIBC_STRING_TARGET_ARCH}/CMakeLists.txt) diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp deleted file mode 100644 index 2dee707bdc4e9..0000000000000 --- a/libc/src/string/memcpy.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===-- Implementation of memcpy ------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/string/memcpy.h" -#include "src/__support/common.h" -#include "src/string/memcpy_arch_specific.h" - -namespace __llvm_libc { - -void *LLVM_LIBC_ENTRYPOINT(memcpy)(void *__restrict dst, - const void *__restrict src, size_t size) { - memcpy_no_return(reinterpret_cast(dst), - reinterpret_cast(src), size); - return dst; -} - -} // namespace __llvm_libc diff --git a/libc/src/string/memcpy_arch_specific.h.def b/libc/src/string/memcpy_arch_specific.h.def deleted file mode 100644 index 8b991e8040007..0000000000000 --- a/libc/src/string/memcpy_arch_specific.h.def +++ /dev/null @@ -1,65 +0,0 @@ -//===-- Implementation of arch specific memcpy ----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H -#define LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H - -%%include_file(${memcpy_arch_specific}) - -namespace __llvm_libc { - -// Design rationale -// ================ -// -// Using a profiler to observe size distributions for calls into libc -// functions, it was found most operations act on a small number of bytes. -// This makes it important to favor small sizes. -// -// The tests for `count` are in ascending order so the cost of branching is -// proportional to the cost of copying. -// -// The function is written in C++ for several reasons: -// - The compiler can __see__ the code, this is useful when performing Profile -// Guided Optimization as the optimized code can take advantage of branching -// probabilities. -// - It also allows for easier customization and favors testing multiple -// implementation parameters. -// - As compilers and processors get better, the generated code is improved -// with little change on the code side. -static void memcpy_no_return(char *__restrict dst, const char *__restrict src, - size_t count) { - if (count == 0) - return; - if (count == 1) - return Copy<1>(dst, src); - if (count == 2) - return Copy<2>(dst, src); - if (count == 3) - return Copy<3>(dst, src); - if (count == 4) - return Copy<4>(dst, src); - if (count < 8) - return CopyOverlap<4>(dst, src, count); - if (count == 8) - return Copy<8>(dst, src); - if (count < 16) - return CopyOverlap<8>(dst, src, count); - if (count == 16) - return Copy<16>(dst, src); - if (count < 32) - return CopyOverlap<16>(dst, src, count); - if (count < 64) - return CopyOverlap<32>(dst, src, count); - if (count < 128) - return CopyOverlap<64>(dst, src, count); - CopyGE128(dst, src, count); -} - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H diff --git a/libc/src/string/memory_utils/memcpy_utils.h b/libc/src/string/memory_utils/memcpy_utils.h index 09e379393cf24..a0e5ccc81c9e1 100644 --- a/libc/src/string/memory_utils/memcpy_utils.h +++ b/libc/src/string/memory_utils/memcpy_utils.h @@ -32,7 +32,7 @@ extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict, // Copies `kBlockSize` bytes from `src` to `dst`. template -static void Copy(char *__restrict dst, const char *__restrict src) { +static void CopyBlock(char *__restrict dst, const char *__restrict src) { #if defined(LLVM_LIBC_MEMCPY_MONITOR) LLVM_LIBC_MEMCPY_MONITOR(dst, src, kBlockSize); #elif defined(USE_BUILTIN_MEMCPY_INLINE) @@ -52,7 +52,7 @@ template static void CopyLastBlock(char *__restrict dst, const char *__restrict src, size_t count) { const size_t offset = count - kBlockSize; - Copy(dst + offset, src + offset); + CopyBlock(dst + offset, src + offset); } // Copies `kBlockSize` bytes twice with an overlap between the two. @@ -64,9 +64,9 @@ static void CopyLastBlock(char *__restrict dst, const char *__restrict src, // // Precondition: `count >= kBlockSize && count <= kBlockSize`. template -static void CopyOverlap(char *__restrict dst, const char *__restrict src, - size_t count) { - Copy(dst, src); +static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src, + size_t count) { + CopyBlock(dst, src); CopyLastBlock(dst, src, count); } @@ -85,14 +85,14 @@ static void CopyOverlap(char *__restrict dst, const char *__restrict src, // Precondition: `count > 2 * kBlockSize` for efficiency. // `count >= kBlockSize` for correctness. template -static void CopyAligned(char *__restrict dst, const char *__restrict src, - size_t count) { - Copy(dst, src); // Copy first block +static void CopyAlignedBlocks(char *__restrict dst, const char *__restrict src, + size_t count) { + CopyBlock(dst, src); // Copy first block // Copy aligned blocks size_t offset = kBlockSize - offset_from_last_aligned(dst); for (; offset + kBlockSize < count; offset += kBlockSize) - Copy(dst + offset, src + offset); + CopyBlock(dst + offset, src + offset); CopyLastBlock(dst, src, count); // Copy last block } diff --git a/libc/src/string/x86/memcpy.cpp b/libc/src/string/x86/memcpy.cpp new file mode 100644 index 0000000000000..811ce5183fe4e --- /dev/null +++ b/libc/src/string/x86/memcpy.cpp @@ -0,0 +1,94 @@ +//===-- Implementation of memcpy ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memcpy.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memcpy_utils.h" + +namespace __llvm_libc { + +static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, + size_t count) { + // FIXME: Add MSVC support with + // #include + // __movsb(reinterpret_cast(dst), + // reinterpret_cast(src), count); + asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); +} + +#if defined(__AVX__) +#define BEST_SIZE 64 +#else +#define BEST_SIZE 32 +#endif + +// Design rationale +// ================ +// +// Using a profiler to observe size distributions for calls into libc +// functions, it was found most operations act on a small number of bytes. +// This makes it important to favor small sizes. +// +// The tests for `count` are in ascending order so the cost of branching is +// proportional to the cost of copying. +// +// The function is written in C++ for several reasons: +// - The compiler can __see__ the code, this is useful when performing Profile +// Guided Optimization as the optimized code can take advantage of branching +// probabilities. +// - It also allows for easier customization and favors testing multiple +// implementation parameters. +// - As compilers and processors get better, the generated code is improved +// with little change on the code side. +static void memcpy_x86(char *__restrict dst, const char *__restrict src, + size_t count) { + if (count == 0) + return; + if (count == 1) + return CopyBlock<1>(dst, src); + if (count == 2) + return CopyBlock<2>(dst, src); + if (count == 3) + return CopyBlock<3>(dst, src); + if (count == 4) + return CopyBlock<4>(dst, src); + if (count < 8) + return CopyBlockOverlap<4>(dst, src, count); + if (count == 8) + return CopyBlock<8>(dst, src); + if (count < 16) + return CopyBlockOverlap<8>(dst, src, count); + if (count == 16) + return CopyBlock<16>(dst, src); + if (count < 32) + return CopyBlockOverlap<16>(dst, src, count); + if (count < 64) + return CopyBlockOverlap<32>(dst, src, count); + if (count < 128) + return CopyBlockOverlap<64>(dst, src, count); +#if defined(__AVX__) + if (count < 256) + return CopyBlockOverlap<128>(dst, src, count); +#endif + // kRepMovsBSize == -1 : Only CopyAligned is used. + // kRepMovsBSize == 0 : Only RepMovsb is used. + // else CopyAligned is used to to kRepMovsBSize and then RepMovsb. + constexpr size_t kRepMovsBSize = -1; + if (count <= kRepMovsBSize) + return CopyAlignedBlocks(dst, src, count); + return CopyRepMovsb(dst, src, count); +} + +void *LLVM_LIBC_ENTRYPOINT(memcpy)(void *__restrict dst, + const void *__restrict src, size_t size) { + memcpy_x86(reinterpret_cast(dst), reinterpret_cast(src), + size); + return dst; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/x86/memcpy_arch_specific.h.inc b/libc/src/string/x86/memcpy_arch_specific.h.inc deleted file mode 100644 index 60610d4c73d25..0000000000000 --- a/libc/src/string/x86/memcpy_arch_specific.h.inc +++ /dev/null @@ -1,35 +0,0 @@ -#include "src/string/memory_utils/memcpy_utils.h" - -namespace __llvm_libc { - -static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, - size_t count) { - // FIXME: Add MSVC support with - // #include - // __movsb(reinterpret_cast(dst), - // reinterpret_cast(src), count); - asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); -} - -#if defined(__AVX__) -#define BEST_SIZE 64 -#else -#define BEST_SIZE 32 -#endif - -static void CopyGE128(char *__restrict dst, const char *__restrict src, - size_t count) { -#if defined(__AVX__) - if (count < 256) - return CopyOverlap<128>(dst, src, count); -#endif - // kRepMovsBSize == -1 : Only CopyAligned is used. - // kRepMovsBSize == 0 : Only RepMovsb is used. - // else CopyAligned is used to to kRepMovsBSize and then RepMovsb. - constexpr size_t kRepMovsBSize = -1; - if (count <= kRepMovsBSize) - return CopyAligned(dst, src, count); - CopyRepMovsb(dst, src, count); -} - -} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/memcpy_utils_test.cpp b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp index 491c632216b7a..7e32fb4f3080a 100644 --- a/libc/test/src/string/memory_utils/memcpy_utils_test.cpp +++ b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp @@ -83,37 +83,37 @@ TEST(MemcpyUtilsTest, CopyTrivial) { auto &trace = GetTrace(); trace.Clear(); - Copy<1>(I(0), I(0)); + CopyBlock<1>(I(0), I(0)); EXPECT_STREQ(trace.Write(), "1"); EXPECT_STREQ(trace.Read(), "1"); trace.Clear(); - Copy<2>(I(0), I(0)); + CopyBlock<2>(I(0), I(0)); EXPECT_STREQ(trace.Write(), "11"); EXPECT_STREQ(trace.Read(), "11"); trace.Clear(); - Copy<4>(I(0), I(0)); + CopyBlock<4>(I(0), I(0)); EXPECT_STREQ(trace.Write(), "1111"); EXPECT_STREQ(trace.Read(), "1111"); trace.Clear(); - Copy<8>(I(0), I(0)); + CopyBlock<8>(I(0), I(0)); EXPECT_STREQ(trace.Write(), "11111111"); EXPECT_STREQ(trace.Read(), "11111111"); trace.Clear(); - Copy<16>(I(0), I(0)); + CopyBlock<16>(I(0), I(0)); EXPECT_STREQ(trace.Write(), "1111111111111111"); EXPECT_STREQ(trace.Read(), "1111111111111111"); trace.Clear(); - Copy<32>(I(0), I(0)); + CopyBlock<32>(I(0), I(0)); EXPECT_STREQ(trace.Write(), "11111111111111111111111111111111"); EXPECT_STREQ(trace.Read(), "11111111111111111111111111111111"); trace.Clear(); - Copy<64>(I(0), I(0)); + CopyBlock<64>(I(0), I(0)); EXPECT_STREQ( trace.Write(), "1111111111111111111111111111111111111111111111111111111111111111"); @@ -126,41 +126,41 @@ TEST(MemcpyUtilsTest, CopyOffset) { auto &trace = GetTrace(); trace.Clear(); - Copy<1>(I(3), I(1)); + CopyBlock<1>(I(3), I(1)); EXPECT_STREQ(trace.Write(), "0001"); EXPECT_STREQ(trace.Read(), "01"); trace.Clear(); - Copy<1>(I(2), I(1)); + CopyBlock<1>(I(2), I(1)); EXPECT_STREQ(trace.Write(), "001"); EXPECT_STREQ(trace.Read(), "01"); } -TEST(MemcpyUtilsTest, CopyOverlap) { +TEST(MemcpyUtilsTest, CopyBlockOverlap) { auto &trace = GetTrace(); trace.Clear(); - CopyOverlap<2>(I(0), I(0), 2); + CopyBlockOverlap<2>(I(0), I(0), 2); EXPECT_STREQ(trace.Write(), "22"); EXPECT_STREQ(trace.Read(), "22"); trace.Clear(); - CopyOverlap<2>(I(0), I(0), 3); + CopyBlockOverlap<2>(I(0), I(0), 3); EXPECT_STREQ(trace.Write(), "121"); EXPECT_STREQ(trace.Read(), "121"); trace.Clear(); - CopyOverlap<2>(I(0), I(0), 4); + CopyBlockOverlap<2>(I(0), I(0), 4); EXPECT_STREQ(trace.Write(), "1111"); EXPECT_STREQ(trace.Read(), "1111"); trace.Clear(); - CopyOverlap<4>(I(2), I(1), 7); + CopyBlockOverlap<4>(I(2), I(1), 7); EXPECT_STREQ(trace.Write(), "001112111"); EXPECT_STREQ(trace.Read(), "01112111"); } -TEST(MemcpyUtilsTest, CopyAligned) { +TEST(MemcpyUtilsTest, CopyAlignedBlocks) { auto &trace = GetTrace(); // Destination is aligned already. // "1111000000000" @@ -169,7 +169,7 @@ TEST(MemcpyUtilsTest, CopyAligned) { // + "0000000001111" // = "1111111112221" trace.Clear(); - CopyAligned<4>(I(0), I(0), 13); + CopyAlignedBlocks<4>(I(0), I(0), 13); EXPECT_STREQ(trace.Write(), "1111111112221"); EXPECT_STREQ(trace.Read(), "1111111112221"); @@ -180,7 +180,7 @@ TEST(MemcpyUtilsTest, CopyAligned) { // + "00000000001111" // = "01112111112211" trace.Clear(); - CopyAligned<4>(I(1), I(0), 13); + CopyAlignedBlocks<4>(I(1), I(0), 13); EXPECT_STREQ(trace.Write(), "01112111112211"); EXPECT_STREQ(trace.Read(), "1112111112211"); } @@ -191,7 +191,7 @@ TEST(MemcpyUtilsTest, MaxReloads) { for (size_t count = 64; count < 768; ++count) { trace.Clear(); // We should never reload more than twice when copying from count = 2x32. - CopyAligned<32>(I(alignment), I(0), count); + CopyAlignedBlocks<32>(I(alignment), I(0), count); const char *const written = trace.Write(); // First bytes are untouched. for (size_t i = 0; i < alignment; ++i) From c1c9eb0ab7d20e61f0fb345a60694bda0487c0da Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Tue, 26 May 2020 19:13:10 +0800 Subject: [PATCH 089/770] [Transforms] Check validity of profile reader before invoking it Although an invalid sampling profile would fail the compilation anyway, this avoids crashing the compiler. --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index eb0747fde6d34..697341443273a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1843,9 +1843,9 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { - GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; if (M.getProfileSummary(/* IsCS */ false) == nullptr) From f368040c14f4bdac718798db28299a68adc42695 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 May 2020 07:56:50 -0400 Subject: [PATCH 090/770] [DAGCombiner] try to move splat after binop with splat constant binop (splat X), (splat C) --> splat (binop X, C) binop (splat C), (splat X) --> splat (binop C, X) We do this in IR, and there's a similar fold for the case with 2 non-constant operands just above the code diff in this patch. This was discussed in D79718, and the extra shuffle in the test (llvm/test/CodeGen/X86/vector-fshl-128.ll::sink_splatvar) where it was noticed disappears because demanded elements analysis is no longer blocked. The large majority of the test diffs seem to be benign code scheduling changes, but I do see another type of win: moving the splat later allows binop narrowing in some cases. Regressions were avoided on x86 and ARM with the INSERT_VECTOR_ELT restriction. Differential Revision: https://reviews.llvm.org/D79886 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 +- llvm/test/CodeGen/X86/vector-fshl-128.ll | 394 +++++++++-------- llvm/test/CodeGen/X86/vector-fshl-256.ll | 408 ++++++++---------- llvm/test/CodeGen/X86/vector-fshl-512.ll | 218 +++++----- llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 110 ++--- llvm/test/CodeGen/X86/vector-fshr-128.ll | 344 ++++++++------- llvm/test/CodeGen/X86/vector-fshr-256.ll | 408 ++++++++---------- llvm/test/CodeGen/X86/vector-fshr-512.ll | 216 +++++----- llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 96 ++--- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 66 ++- llvm/test/CodeGen/X86/vector-rotate-128.ll | 110 ++--- .../test/CodeGen/X86/vector-shift-ashr-128.ll | 8 +- .../test/CodeGen/X86/vector-shift-lshr-128.ll | 4 +- 13 files changed, 1099 insertions(+), 1313 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 40ceb5b34ad39..7e41b2fffeda1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20474,6 +20474,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue Ops[] = {LHS, RHS}; EVT VT = N->getValueType(0); unsigned Opcode = N->getOpcode(); + SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. if (SDValue Fold = DAG.FoldConstantVectorArithmetic( @@ -20497,10 +20498,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { SDLoc DL(N); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), - RHS.getOperand(0), N->getFlags()); + RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask()); } + + // Try to sink a splat shuffle after a binop with a uniform constant. + // This is limited to cases where neither the shuffle nor the constant have + // undefined elements because that could be poison-unsafe or inhibit + // demanded elements analysis. It is further limited to not change a splat + // of an inserted scalar because that may be optimized better by + // load-folding or other target-specific behaviors. + if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) && + Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && + Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { + // binop (splat X), (splat C) --> splat (binop X, C) + SDLoc DL(N); + SDValue X = Shuf0->getOperand(0); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); + return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), + Shuf0->getMask()); + } + if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) && + Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && + Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { + // binop (splat C), (splat X) --> splat (binop C, X) + SDLoc DL(N); + SDValue X = Shuf1->getOperand(0); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); + return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), + Shuf1->getMask()); + } } // The following pattern is likely to emerge with vector reduction ops. Moving diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 144a0457e63e5..0c5e19f24dbe5 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -1182,7 +1182,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: psllq %xmm2, %xmm3 @@ -1190,6 +1189,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; SSE2-NEXT: psubq %xmm2, %xmm4 ; SSE2-NEXT: psrlq %xmm4, %xmm1 ; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] @@ -1202,29 +1202,29 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; SSE41-LABEL: splatvar_funnnel_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,1,0,1] -; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psllq %xmm4, %xmm2 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: psllq %xmm2, %xmm4 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [64,64] -; SSE41-NEXT: psubq %xmm4, %xmm0 +; SSE41-NEXT: psubq %xmm2, %xmm0 ; SSE41-NEXT: psrlq %xmm0, %xmm1 -; SSE41-NEXT: por %xmm1, %xmm2 +; SSE41-NEXT: por %xmm1, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1] ; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: splatvar_funnnel_v2i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1232,13 +1232,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX2-LABEL: splatvar_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1247,13 +1247,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512F-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 @@ -1262,13 +1262,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX512VL-LABEL: splatvar_funnnel_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VL-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VL-NEXT: vptestnmq %xmm2, %xmm2, %k1 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} ; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 @@ -1277,13 +1277,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; AVX512BW-LABEL: splatvar_funnnel_v2i64: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512BW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 @@ -1293,13 +1293,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VBMI2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VBMI2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 @@ -1308,13 +1308,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v2i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VLBW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VLBW-NEXT: vptestnmq %xmm2, %xmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa %xmm1, %xmm0 @@ -1328,13 +1328,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpcomeqq %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1342,13 +1342,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; XOPAVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; XOPAVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcomeqq %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1356,29 +1356,25 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; X32-SSE-LABEL: splatvar_funnnel_v2i64: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1] +; X32-SSE-NEXT: pxor %xmm4, %xmm4 +; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm4 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,0,3,2] +; X32-SSE-NEXT: pand %xmm4, %xmm5 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [64,0,64,0] +; X32-SSE-NEXT: psubq %xmm3, %xmm4 +; X32-SSE-NEXT: movdqa %xmm1, %xmm3 +; X32-SSE-NEXT: psrlq %xmm4, %xmm3 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1] +; X32-SSE-NEXT: psrlq %xmm4, %xmm1 +; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm3 ; X32-SSE-NEXT: psllq %xmm2, %xmm3 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] -; X32-SSE-NEXT: movdqa %xmm0, %xmm5 -; X32-SSE-NEXT: psllq %xmm4, %xmm5 -; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1] -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [64,0,64,0] -; X32-SSE-NEXT: psubq %xmm2, %xmm3 -; X32-SSE-NEXT: movdqa %xmm1, %xmm4 -; X32-SSE-NEXT: psrlq %xmm3, %xmm4 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; X32-SSE-NEXT: psrlq %xmm3, %xmm1 -; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1] -; X32-SSE-NEXT: orpd %xmm5, %xmm1 -; X32-SSE-NEXT: pxor %xmm3, %xmm3 -; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm3 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] -; X32-SSE-NEXT: pand %xmm3, %xmm2 -; X32-SSE-NEXT: pand %xmm2, %xmm0 -; X32-SSE-NEXT: pandn %xmm1, %xmm2 -; X32-SSE-NEXT: por %xmm2, %xmm0 +; X32-SSE-NEXT: por %xmm1, %xmm3 +; X32-SSE-NEXT: pand %xmm5, %xmm0 +; X32-SSE-NEXT: pandn %xmm3, %xmm5 +; X32-SSE-NEXT: por %xmm5, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %splat) @@ -1388,8 +1384,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE2-NEXT: andps {{.*}}(%rip), %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: xorps %xmm4, %xmm4 ; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] @@ -1401,6 +1396,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; SSE2-NEXT: movd %ecx, %xmm4 ; SSE2-NEXT: psrld %xmm4, %xmm1 ; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pandn %xmm1, %xmm2 @@ -1410,25 +1406,24 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; SSE41-LABEL: splatvar_funnnel_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,0,0] -; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm4[0],zero,xmm4[1],zero -; SSE41-NEXT: movdqa %xmm3, %xmm2 -; SSE41-NEXT: pslld %xmm0, %xmm2 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: pslld %xmm0, %xmm4 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32] -; SSE41-NEXT: psubd %xmm4, %xmm0 +; SSE41-NEXT: psubd %xmm2, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: psrld %xmm0, %xmm1 -; SSE41-NEXT: por %xmm1, %xmm2 +; SSE41-NEXT: por %xmm1, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0] ; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 -; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm4 +; SSE41-NEXT: movaps %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: splatvar_funnnel_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX1-NEXT: vpslld %xmm3, %xmm0, %xmm3 @@ -1437,6 +1432,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 @@ -1444,7 +1440,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX2-LABEL: splatvar_funnnel_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1454,6 +1449,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 @@ -1462,7 +1458,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512F-LABEL: splatvar_funnnel_v4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1472,6 +1467,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512F-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 @@ -1480,7 +1476,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX512VL-LABEL: splatvar_funnnel_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VL-NEXT: vpslld %xmm3, %xmm0, %xmm3 @@ -1489,6 +1484,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VL-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VL-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} ; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 @@ -1497,7 +1493,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512BW-LABEL: splatvar_funnnel_v4i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1507,6 +1502,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512BW-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 @@ -1516,7 +1512,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1526,6 +1521,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VBMI2-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX512VBMI2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 @@ -1534,7 +1530,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VLBW-NEXT: vpslld %xmm3, %xmm0, %xmm3 @@ -1543,6 +1538,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VLBW-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VLBW-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa %xmm1, %xmm0 @@ -1556,7 +1552,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; XOPAVX1-NEXT: vpslld %xmm3, %xmm0, %xmm3 @@ -1565,6 +1560,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpcomeqd %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 @@ -1572,7 +1568,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1582,6 +1577,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; XOPAVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; XOPAVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcomeqd %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 @@ -1589,8 +1585,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; X32-SSE-LABEL: splatvar_funnnel_v4i32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm2 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 ; X32-SSE-NEXT: xorps %xmm4, %xmm4 ; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] @@ -1602,6 +1597,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; X32-SSE-NEXT: movd %ecx, %xmm4 ; X32-SSE-NEXT: psrld %xmm4, %xmm1 ; X32-SSE-NEXT: por %xmm5, %xmm1 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 ; X32-SSE-NEXT: pand %xmm2, %xmm0 ; X32-SSE-NEXT: pandn %xmm1, %xmm2 @@ -1615,13 +1611,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; SSE2-NEXT: psubw %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: pcmpeqw %xmm2, %xmm4 +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[0,0,2,3,4,5,6,7] ; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] ; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: movdqa %xmm0, %xmm5 @@ -1630,35 +1623,36 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; SSE2-NEXT: psrldq {{.*#+}} xmm3 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm3, %xmm1 ; SSE2-NEXT: por %xmm5, %xmm1 -; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,0,0,0] +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpeqw %xmm2, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm3 +; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,0,0] -; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero -; SSE41-NEXT: movdqa %xmm3, %xmm2 -; SSE41-NEXT: psllw %xmm0, %xmm2 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psllw %xmm0, %xmm4 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm4, %xmm0 +; SSE41-NEXT: psubw %xmm2, %xmm0 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SSE41-NEXT: psrlw %xmm0, %xmm1 -; SSE41-NEXT: por %xmm1, %xmm2 +; SSE41-NEXT: por %xmm1, %xmm4 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] ; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqw %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: splatvar_funnnel_v8i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1667,6 +1661,8 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1674,7 +1670,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX2-LABEL: splatvar_funnnel_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1683,6 +1678,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1690,7 +1686,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1699,6 +1694,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1706,7 +1702,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1715,6 +1710,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1723,7 +1719,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1732,6 +1727,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 @@ -1741,7 +1737,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1750,6 +1745,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VBMI2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 @@ -1758,7 +1754,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1767,6 +1762,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VLBW-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa %xmm1, %xmm0 @@ -1780,8 +1776,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1790,6 +1784,8 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1797,7 +1793,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v8i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm3, %xmm0, %xmm3 @@ -1806,6 +1801,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; XOPAVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1813,13 +1809,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; X32-SSE-LABEL: splatvar_funnnel_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; X32-SSE-NEXT: psubw %xmm2, %xmm3 -; X32-SSE-NEXT: pxor %xmm4, %xmm4 -; X32-SSE-NEXT: pcmpeqw %xmm2, %xmm4 +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[0,0,2,3,4,5,6,7] ; X32-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: movdqa %xmm0, %xmm5 @@ -1828,9 +1821,12 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; X32-SSE-NEXT: psrldq {{.*#+}} xmm3 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psrlw %xmm3, %xmm1 ; X32-SSE-NEXT: por %xmm5, %xmm1 -; X32-SSE-NEXT: pand %xmm4, %xmm0 -; X32-SSE-NEXT: pandn %xmm1, %xmm4 -; X32-SSE-NEXT: por %xmm4, %xmm0 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,0,0,0] +; X32-SSE-NEXT: pxor %xmm3, %xmm3 +; X32-SSE-NEXT: pcmpeqw %xmm2, %xmm3 +; X32-SSE-NEXT: pand %xmm3, %xmm0 +; X32-SSE-NEXT: pandn %xmm1, %xmm3 +; X32-SSE-NEXT: por %xmm3, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %splat) @@ -1840,62 +1836,63 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v16i8: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE2-NEXT: psubb %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: pcmpeqb %xmm2, %xmm4 -; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: psllw %xmm2, %xmm5 -; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE2-NEXT: psllw %xmm2, %xmm6 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0] -; SSE2-NEXT: pand %xmm5, %xmm6 +; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0] ; SSE2-NEXT: psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psrlw %xmm3, %xmm1 -; SSE2-NEXT: psrlw %xmm3, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: psllw %xmm3, %xmm4 +; SSE2-NEXT: pcmpeqd %xmm5, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 +; SSE2-NEXT: psllw %xmm3, %xmm6 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm6[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; SSE2-NEXT: psubb %xmm2, %xmm4 +; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] +; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE2-NEXT: psrlw %xmm4, %xmm1 +; SSE2-NEXT: psrlw %xmm4, %xmm5 +; SSE2-NEXT: psrlw $8, %xmm5 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] +; SSE2-NEXT: pand %xmm1, %xmm4 +; SSE2-NEXT: por %xmm3, %xmm4 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: por %xmm6, %xmm2 -; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: pandn %xmm2, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqb %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm4, %xmm2 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: psllw %xmm5, %xmm4 +; SSE41-NEXT: psllw %xmm0, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm7, %xmm7 -; SSE41-NEXT: psllw %xmm5, %xmm7 -; SSE41-NEXT: pshufb %xmm0, %xmm7 -; SSE41-NEXT: pand %xmm7, %xmm4 -; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE41-NEXT: psubb %xmm2, %xmm5 -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,zero,zero,zero,zero,xmm5[1],zero,zero,zero,zero,zero,zero,zero -; SSE41-NEXT: psrlw %xmm5, %xmm1 -; SSE41-NEXT: psrlw %xmm5, %xmm6 -; SSE41-NEXT: pshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE41-NEXT: pand %xmm1, %xmm6 -; SSE41-NEXT: por %xmm6, %xmm4 +; SSE41-NEXT: psllw %xmm0, %xmm6 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pshufb %xmm0, %xmm6 +; SSE41-NEXT: pand %xmm6, %xmm4 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; SSE41-NEXT: psubb %xmm2, %xmm6 +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: psrlw %xmm6, %xmm1 +; SSE41-NEXT: psrlw %xmm6, %xmm5 +; SSE41-NEXT: pshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pand %xmm1, %xmm5 +; SSE41-NEXT: por %xmm5, %xmm4 +; SSE41-NEXT: pshufb %xmm0, %xmm2 ; SSE41-NEXT: pcmpeqb %xmm2, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 @@ -1903,30 +1900,29 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX1-LABEL: splatvar_funnnel_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpsllw %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpsubb %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,zero,zero,zero,zero,xmm5[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsllw %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX1-NEXT: vpshufb %xmm6, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX1-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpshufb %xmm6, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm3, %xmm0, %xmm4 @@ -1943,6 +1939,7 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX2-NEXT: vpbroadcastb %xmm4, %xmm4 ; AVX2-NEXT: vpand %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 @@ -1950,9 +1947,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512F-LABEL: splatvar_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero ; AVX512F-NEXT: vpsllvd %zmm4, %zmm3, %zmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1970,9 +1967,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VL-LABEL: splatvar_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero ; AVX512VL-NEXT: vpsllvd %zmm4, %zmm3, %zmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1991,9 +1988,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512BW-LABEL: splatvar_funnnel_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512BW-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero @@ -2012,9 +2009,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero @@ -2032,9 +2029,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512VLBW-NEXT: vpsllvw %ymm4, %ymm3, %ymm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -2052,9 +2049,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512VLVBMI2-NEXT: vpsllvw %ymm4, %ymm3, %ymm3 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -2072,9 +2069,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i8: ; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm4 ; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm2, %xmm5 ; XOPAVX1-NEXT: vpshlb %xmm5, %xmm1, %xmm1 @@ -2085,8 +2082,8 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm3 ; XOPAVX2-NEXT: vpsubb {{.*}}(%rip), %xmm2, %xmm4 ; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1 @@ -2098,38 +2095,39 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; X32-SSE-LABEL: splatvar_funnnel_v16i8: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X32-SSE-NEXT: psubb %xmm2, %xmm3 -; X32-SSE-NEXT: pxor %xmm4, %xmm4 -; X32-SSE-NEXT: pcmpeqb %xmm2, %xmm4 -; X32-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: movdqa %xmm0, %xmm5 -; X32-SSE-NEXT: psllw %xmm2, %xmm5 -; X32-SSE-NEXT: pcmpeqd %xmm6, %xmm6 -; X32-SSE-NEXT: psllw %xmm2, %xmm6 -; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0] -; X32-SSE-NEXT: pand %xmm5, %xmm6 +; X32-SSE-NEXT: movdqa %xmm2, %xmm3 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: psrlw %xmm3, %xmm1 -; X32-SSE-NEXT: psrlw %xmm3, %xmm2 -; X32-SSE-NEXT: psrlw $8, %xmm2 +; X32-SSE-NEXT: movdqa %xmm0, %xmm4 +; X32-SSE-NEXT: psllw %xmm3, %xmm4 +; X32-SSE-NEXT: pcmpeqd %xmm5, %xmm5 +; X32-SSE-NEXT: pcmpeqd %xmm6, %xmm6 +; X32-SSE-NEXT: psllw %xmm3, %xmm6 +; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm6[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] +; X32-SSE-NEXT: pand %xmm4, %xmm3 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; X32-SSE-NEXT: psubb %xmm2, %xmm4 +; X32-SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm4 = xmm4[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X32-SSE-NEXT: psrlw %xmm4, %xmm1 +; X32-SSE-NEXT: psrlw %xmm4, %xmm5 +; X32-SSE-NEXT: psrlw $8, %xmm5 +; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] +; X32-SSE-NEXT: pand %xmm1, %xmm4 +; X32-SSE-NEXT: por %xmm3, %xmm4 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; X32-SSE-NEXT: pand %xmm1, %xmm2 -; X32-SSE-NEXT: por %xmm6, %xmm2 -; X32-SSE-NEXT: pand %xmm4, %xmm0 -; X32-SSE-NEXT: pandn %xmm2, %xmm4 -; X32-SSE-NEXT: por %xmm4, %xmm0 +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; X32-SSE-NEXT: pxor %xmm2, %xmm2 +; X32-SSE-NEXT: pcmpeqb %xmm1, %xmm2 +; X32-SSE-NEXT: pand %xmm2, %xmm0 +; X32-SSE-NEXT: pandn %xmm4, %xmm2 +; X32-SSE-NEXT: por %xmm2, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <16 x i8> %amt, <16 x i8> undef, <16 x i32> zeroinitializer %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %splat) @@ -2166,9 +2164,8 @@ define void @sink_splatvar(i32* %p, i32 %shift_amt) { ; ; SSE41-LABEL: sink_splatvar: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movd %esi, %xmm0 +; SSE41-NEXT: movd %esi, %xmm1 ; SSE41-NEXT: movq $-1024, %rax # imm = 0xFC00 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32] ; SSE41-NEXT: psubd %xmm1, %xmm0 @@ -2192,7 +2189,6 @@ define void @sink_splatvar(i32* %p, i32 %shift_amt) { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [32,32,32,32] ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 4903104054479..860c2d576c728 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -906,39 +906,35 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm4 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [64,64] -; AVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 -; AVX1-NEXT: vpsrlq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpsrlq %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] +; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrlq %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqq %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -947,14 +943,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512F-LABEL: splatvar_funnnel_v4i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512F-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512F-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa %ymm1, %ymm0 @@ -962,13 +958,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; AVX512VL-LABEL: splatvar_funnnel_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm2, %ymm2 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VL-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512VL-NEXT: vptestnmq %ymm2, %ymm2, %k1 ; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} ; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 @@ -977,14 +973,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512BW-LABEL: splatvar_funnnel_v4i64: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512BW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0 @@ -993,14 +989,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VBMI2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 @@ -1008,13 +1004,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VLBW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512VLBW-NEXT: vptestnmq %ymm2, %ymm2, %k1 ; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 @@ -1028,39 +1024,35 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,1,0,1] -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm4 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [64,64] -; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm6 -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 -; XOPAVX1-NEXT: vpsrlq %xmm6, %xmm7, %xmm6 -; XOPAVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsrlq %xmm5, %xmm1, %xmm1 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] +; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; XOPAVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 ; XOPAVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpcomeqq %xmm3, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpcomeqq %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2 -; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; XOPAVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -1073,37 +1065,31 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpslld %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vpslld %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32] -; AVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 -; AVX1-NEXT: vpsrld %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vpsubd %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero -; AVX1-NEXT: vpsrld %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; AVX1-NEXT: vpslld %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpslld %xmm4, %xmm0, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32,32,32,32] +; AVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrld %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX2-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1111,6 +1097,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX2-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 @@ -1119,9 +1106,8 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512F-LABEL: splatvar_funnnel_v8i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512F-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1129,6 +1115,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512F-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa %ymm1, %ymm0 @@ -1136,8 +1123,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VL-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1145,6 +1131,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VL-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512VL-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} ; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 @@ -1153,9 +1140,8 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512BW-LABEL: splatvar_funnnel_v8i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512BW-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1163,6 +1149,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512BW-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512BW-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0 @@ -1171,9 +1158,8 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VBMI2-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1181,6 +1167,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VBMI2-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; AVX512VBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 @@ -1188,8 +1175,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VLBW-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1197,6 +1183,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VLBW-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512VLBW-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 @@ -1210,37 +1197,31 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0] -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; XOPAVX1-NEXT: vpslld %xmm3, %xmm4, %xmm4 -; XOPAVX1-NEXT: vpslld %xmm3, %xmm0, %xmm3 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32] -; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm6 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 -; XOPAVX1-NEXT: vpsrld %xmm6, %xmm7, %xmm6 -; XOPAVX1-NEXT: vpsubd %xmm2, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero -; XOPAVX1-NEXT: vpsrld %xmm5, %xmm1, %xmm1 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; XOPAVX1-NEXT: vpslld %xmm4, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpslld %xmm4, %xmm0, %xmm4 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32,32,32,32] +; XOPAVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; XOPAVX1-NEXT: vpsrld %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; XOPAVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 ; XOPAVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpcomeqd %xmm3, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpcomeqd %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd %xmm2, %ymm2 -; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; XOPAVX2-NEXT: vpslld %xmm3, %ymm0, %ymm3 ; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1248,6 +1229,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; XOPAVX2-NEXT: vpsrld %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; XOPAVX2-NEXT: vpbroadcastd %xmm2, %ymm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 @@ -1260,30 +1242,25 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX1-NEXT: vpsllw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpsllw %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 -; AVX1-NEXT: vpsrlw %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -1291,8 +1268,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1300,6 +1276,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1307,8 +1284,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1316,6 +1292,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1323,8 +1300,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1332,6 +1308,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1340,8 +1317,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1349,6 +1325,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0 @@ -1357,8 +1334,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1366,6 +1342,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 @@ -1373,8 +1350,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1382,6 +1358,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512VLBW-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; AVX512VLBW-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 @@ -1395,37 +1372,31 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; XOPAVX1-NEXT: vpsllw %xmm4, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm4 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; XOPAVX1-NEXT: vpsllw %xmm3, %xmm4, %xmm4 -; XOPAVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; XOPAVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 -; XOPAVX1-NEXT: vpsrlw %xmm6, %xmm7, %xmm6 -; XOPAVX1-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; XOPAVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; XOPAVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; XOPAVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 ; XOPAVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastw %xmm2, %ymm2 -; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1433,6 +1404,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; XOPAVX2-NEXT: vpbroadcastw %xmm2, %ymm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1445,41 +1417,33 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v32i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8 -; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; AVX1-NEXT: vpsllw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm9, %xmm9, %xmm9 -; AVX1-NEXT: vpsllw %xmm4, %xmm9, %xmm7 -; AVX1-NEXT: vpshufb %xmm8, %xmm7, %xmm7 -; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpsllw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsllw %xmm4, %xmm5, %xmm6 +; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 +; AVX1-NEXT: vpshufb %xmm7, %xmm6, %xmm6 +; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4 -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm10 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpsubb %xmm5, %xmm10, %xmm3 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 +; AVX1-NEXT: vpshufb %xmm7, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX1-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 -; AVX1-NEXT: vpsrlw %xmm3, %xmm6, %xmm6 -; AVX1-NEXT: vpsrlw %xmm3, %xmm9, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX1-NEXT: vpshufb %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3 -; AVX1-NEXT: vpsubb %xmm2, %xmm10, %xmm6 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw %xmm6, %xmm9, %xmm6 -; AVX1-NEXT: vpshufb %xmm7, %xmm6, %xmm6 -; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX1-NEXT: vorps %ymm1, %ymm4, %ymm1 -; AVX1-NEXT: vpcmpeqb %xmm5, %xmm8, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm8, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vpsrlw %xmm4, %xmm6, %xmm6 +; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: vpcmpeqb %xmm7, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -1487,8 +1451,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX2-LABEL: splatvar_funnnel_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm4 ; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -1504,6 +1467,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1511,8 +1475,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512F-LABEL: splatvar_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -1528,6 +1491,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1535,8 +1499,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512VL-LABEL: splatvar_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -1552,6 +1515,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 @@ -1560,9 +1524,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512BW-LABEL: splatvar_funnnel_v32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512BW-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1580,9 +1544,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1599,9 +1563,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512VLBW-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1618,9 +1582,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512VLVBMI2-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1637,36 +1601,30 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v32i8: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8 -; XOPAVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; XOPAVX1-NEXT: vpshlb %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm5 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4 +; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm2, %xmm5 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 +; XOPAVX1-NEXT: vpshlb %xmm5, %xmm6, %xmm6 +; XOPAVX1-NEXT: vpshlb %xmm5, %xmm1, %xmm1 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 +; XOPAVX1-NEXT: vorps %ymm1, %ymm4, %ymm1 +; XOPAVX1-NEXT: vpcomeqb %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; XOPAVX1-NEXT: vpshlb %xmm4, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm6 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; XOPAVX1-NEXT: vpsubb %xmm6, %xmm4, %xmm7 -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; XOPAVX1-NEXT: vpshlb %xmm7, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsubb %xmm6, %xmm2, %xmm6 -; XOPAVX1-NEXT: vpshlb %xmm6, %xmm1, %xmm1 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; XOPAVX1-NEXT: vorps %ymm1, %ymm5, %ymm1 -; XOPAVX1-NEXT: vpcomeqb %xmm8, %xmm4, %xmm3 -; XOPAVX1-NEXT: vpcomeqb %xmm8, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v32i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 -; XOPAVX2-NEXT: vpshlb %xmm4, %xmm3, %xmm3 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX2-NEXT: vpbroadcastb %xmm2, %ymm2 +; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm4 ; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index c65d3e7a49480..20cfba41a1c46 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -564,13 +564,14 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v8i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512F-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512F-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -578,13 +579,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VL-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512VL-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -592,13 +593,14 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> % ; ; AVX512BW-LABEL: splatvar_funnnel_v8i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512BW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -612,13 +614,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VLBW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -637,8 +639,8 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> % define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v16i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512F-NEXT: vpslld %xmm3, %zmm0, %zmm3 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -646,6 +648,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512F-NEXT: vpsrld %xmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpord %zmm1, %zmm3, %zmm1 +; AVX512F-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -653,8 +656,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; ; AVX512VL-LABEL: splatvar_funnnel_v16i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VL-NEXT: vpslld %xmm3, %zmm0, %zmm3 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -662,6 +664,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VL-NEXT: vpsrld %xmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpord %zmm1, %zmm3, %zmm1 +; AVX512VL-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512VL-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -669,8 +672,8 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; ; AVX512BW-LABEL: splatvar_funnnel_v16i32: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512BW-NEXT: vpslld %xmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -678,6 +681,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512BW-NEXT: vpsrld %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpord %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -691,8 +695,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VLBW-NEXT: vpslld %xmm3, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -700,6 +703,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VLBW-NEXT: vpsrld %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -718,64 +722,53 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm4, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm7 -; AVX512F-NEXT: vpsrlw %xmm6, %ymm7, %ymm6 -; AVX512F-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm5, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm4, %ymm4 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 ; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm7 -; AVX512VL-NEXT: vpsrlw %xmm6, %ymm7, %ymm6 -; AVX512VL-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm5, %ymm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm4, %ymm4 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 ; AVX512VL-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -783,6 +776,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -796,8 +790,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -805,6 +798,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -823,90 +817,73 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm4, %ymm4 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm6 +; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm6 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm4, %xmm6, %xmm7 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm8 -; AVX512F-NEXT: vpsrlw %xmm7, %ymm8, %ymm8 -; AVX512F-NEXT: vpsrlw %xmm7, %xmm5, %xmm7 -; AVX512F-NEXT: vpsrlw $8, %xmm7, %xmm7 -; AVX512F-NEXT: vpbroadcastb %xmm7, %ymm7 -; AVX512F-NEXT: vpand %ymm7, %ymm8, %ymm7 -; AVX512F-NEXT: vpsubb %xmm2, %xmm6, %xmm6 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm6, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw %xmm6, %xmm5, %xmm5 +; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm4 +; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm6 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm6, %ymm6 +; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 ; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 +; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm7, %zmm1, %zmm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 ; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm4, %ymm4 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 ; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm4, %ymm4 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm6 +; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm6 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm4, %xmm6, %xmm7 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm8 -; AVX512VL-NEXT: vpsrlw %xmm7, %ymm8, %ymm8 -; AVX512VL-NEXT: vpsrlw %xmm7, %xmm5, %xmm7 -; AVX512VL-NEXT: vpsrlw $8, %xmm7, %xmm7 -; AVX512VL-NEXT: vpbroadcastb %xmm7, %ymm7 -; AVX512VL-NEXT: vpand %ymm7, %ymm8, %ymm7 -; AVX512VL-NEXT: vpsubb %xmm2, %xmm6, %xmm6 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm6, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsrlw %xmm6, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm4 +; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm6 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm6, %ymm6 +; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 ; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 +; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm7, %zmm1, %zmm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 ; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm4, %ymm4 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 ; AVX512VL-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm4 ; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -922,6 +899,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512BW-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -929,8 +907,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4 ; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -946,6 +923,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -953,8 +931,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm4 ; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -970,6 +947,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLBW-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -977,8 +955,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4 ; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -994,6 +971,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLVBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index 4d5b148b362e8..fb667a1106222 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -779,7 +779,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: movdqa %xmm0, %xmm3 @@ -793,7 +792,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm2 @@ -806,7 +804,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero @@ -881,15 +878,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: psllw %xmm2, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] ; SSE2-NEXT: psubw %xmm1, %xmm2 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psllw %xmm1, %xmm3 ; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] ; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm2, %xmm0 @@ -898,8 +893,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; SSE41-NEXT: movdqa %xmm0, %xmm3 @@ -911,36 +904,20 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: splatvar_funnnel_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_funnnel_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_funnnel_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: splatvar_funnnel_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 @@ -966,15 +943,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; X32-SSE-LABEL: splatvar_funnnel_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; X32-SSE-NEXT: pand %xmm1, %xmm2 +; X32-SSE-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE-NEXT: psllw %xmm2, %xmm3 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] ; X32-SSE-NEXT: psubw %xmm1, %xmm2 -; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: movdqa %xmm0, %xmm3 -; X32-SSE-NEXT: psllw %xmm1, %xmm3 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psrlw %xmm2, %xmm0 @@ -988,9 +963,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v16i8: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; SSE2-NEXT: psubb %xmm1, %xmm2 @@ -1021,44 +993,42 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pshufb %xmm3, %xmm1 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psllw %xmm4, %xmm2 +; SSE41-NEXT: psllw %xmm3, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE41-NEXT: psllw %xmm4, %xmm6 -; SSE41-NEXT: pshufb %xmm3, %xmm6 -; SSE41-NEXT: pand %xmm6, %xmm2 +; SSE41-NEXT: psllw %xmm3, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pshufb %xmm3, %xmm5 +; SSE41-NEXT: pand %xmm5, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; SSE41-NEXT: psubb %xmm1, %xmm3 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: psrlw %xmm1, %xmm0 -; SSE41-NEXT: psrlw %xmm1, %xmm5 -; SSE41-NEXT: pshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE41-NEXT: pand %xmm0, %xmm5 -; SSE41-NEXT: por %xmm5, %xmm2 +; SSE41-NEXT: psrlw %xmm1, %xmm4 +; SSE41-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pand %xmm0, %xmm4 +; SSE41-NEXT: por %xmm4, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: splatvar_funnnel_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: vpsllw %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpsllw %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpshufb %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 @@ -1066,7 +1036,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm3 @@ -1173,9 +1142,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; X32-SSE-LABEL: splatvar_funnnel_v16i8: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; X32-SSE-NEXT: psubb %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index ca67324439f07..f09ded8d95f74 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1196,7 +1196,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm3 ; SSE2-NEXT: psrlq %xmm2, %xmm3 @@ -1204,6 +1203,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; SSE2-NEXT: psubq %xmm2, %xmm4 ; SSE2-NEXT: psllq %xmm4, %xmm0 ; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] @@ -1217,7 +1217,6 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; SSE41-LABEL: splatvar_funnnel_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrlq %xmm2, %xmm0 @@ -1225,6 +1224,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; SSE41-NEXT: psubq %xmm2, %xmm4 ; SSE41-NEXT: psllq %xmm4, %xmm3 ; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 @@ -1233,13 +1233,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX1-LABEL: splatvar_funnnel_v2i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 @@ -1247,13 +1247,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX2-LABEL: splatvar_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX2-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 @@ -1262,13 +1262,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512F-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1277,13 +1277,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX512VL-LABEL: splatvar_funnnel_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VL-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VL-NEXT: vptestnmq %xmm2, %xmm2, %k1 ; AVX512VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq @@ -1291,13 +1291,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; AVX512BW-LABEL: splatvar_funnnel_v2i64: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512BW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1307,13 +1307,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VBMI2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX512VBMI2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1322,13 +1322,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v2i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VLBW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512VLBW-NEXT: vptestnmq %xmm2, %xmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -1342,13 +1342,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpcomeqq %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 @@ -1356,13 +1356,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsrlq %xmm2, %xmm1, %xmm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; XOPAVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; XOPAVX2-NEXT: vpsllq %xmm4, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcomeqq %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 @@ -1370,30 +1370,26 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; X32-SSE-LABEL: splatvar_funnnel_v2i64: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 -; X32-SSE-NEXT: movdqa %xmm1, %xmm3 -; X32-SSE-NEXT: psrlq %xmm2, %xmm3 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] -; X32-SSE-NEXT: movdqa %xmm1, %xmm5 -; X32-SSE-NEXT: psrlq %xmm4, %xmm5 -; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1] -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [64,0,64,0] -; X32-SSE-NEXT: psubq %xmm2, %xmm3 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,1,0,1] +; X32-SSE-NEXT: pxor %xmm5, %xmm5 +; X32-SSE-NEXT: pcmpeqd %xmm4, %xmm5 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,0,3,2] +; X32-SSE-NEXT: pand %xmm5, %xmm3 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm5 = [64,0,64,0] +; X32-SSE-NEXT: psubq %xmm4, %xmm5 ; X32-SSE-NEXT: movdqa %xmm0, %xmm4 -; X32-SSE-NEXT: psllq %xmm3, %xmm4 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; X32-SSE-NEXT: psllq %xmm3, %xmm0 +; X32-SSE-NEXT: psllq %xmm5, %xmm4 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] +; X32-SSE-NEXT: psllq %xmm5, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1] -; X32-SSE-NEXT: orpd %xmm5, %xmm0 -; X32-SSE-NEXT: pxor %xmm3, %xmm3 -; X32-SSE-NEXT: pcmpeqd %xmm2, %xmm3 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] -; X32-SSE-NEXT: pand %xmm3, %xmm2 -; X32-SSE-NEXT: pand %xmm2, %xmm1 -; X32-SSE-NEXT: pandn %xmm0, %xmm2 -; X32-SSE-NEXT: por %xmm1, %xmm2 -; X32-SSE-NEXT: movdqa %xmm2, %xmm0 +; X32-SSE-NEXT: movdqa %xmm1, %xmm4 +; X32-SSE-NEXT: psrlq %xmm2, %xmm4 +; X32-SSE-NEXT: por %xmm0, %xmm4 +; X32-SSE-NEXT: pand %xmm3, %xmm1 +; X32-SSE-NEXT: pandn %xmm4, %xmm3 +; X32-SSE-NEXT: por %xmm1, %xmm3 +; X32-SSE-NEXT: movdqa %xmm3, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %splat) @@ -1403,8 +1399,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE2-NEXT: andps {{.*}}(%rip), %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: xorps %xmm4, %xmm4 ; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] @@ -1416,6 +1411,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; SSE2-NEXT: movd %ecx, %xmm4 ; SSE2-NEXT: pslld %xmm4, %xmm0 ; SSE2-NEXT: por %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pandn %xmm0, %xmm2 @@ -1426,7 +1422,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; SSE41-LABEL: splatvar_funnnel_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero ; SSE41-NEXT: movdqa %xmm1, %xmm4 @@ -1436,6 +1431,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: pslld %xmm0, %xmm3 ; SSE41-NEXT: por %xmm4, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 ; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3 @@ -1444,7 +1440,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX1-NEXT: vpsrld %xmm3, %xmm1, %xmm3 @@ -1453,6 +1448,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX1-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 @@ -1460,7 +1456,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX2-LABEL: splatvar_funnnel_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1470,6 +1465,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX2-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 @@ -1478,7 +1474,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512F-LABEL: splatvar_funnnel_v4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512F-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1488,6 +1483,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512F-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1496,7 +1492,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX512VL-LABEL: splatvar_funnnel_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VL-NEXT: vpsrld %xmm3, %xmm1, %xmm3 @@ -1505,6 +1500,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VL-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VL-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; AVX512VL-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq @@ -1512,7 +1508,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512BW-LABEL: splatvar_funnnel_v4i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1522,6 +1517,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512BW-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512BW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1531,7 +1527,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1541,6 +1536,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VBMI2-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX512VBMI2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1549,7 +1545,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VLBW-NEXT: vpsrld %xmm3, %xmm1, %xmm3 @@ -1558,6 +1553,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VLBW-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512VLBW-NEXT: vptestnmd %xmm2, %xmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -1571,7 +1567,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; XOPAVX1-NEXT: vpsrld %xmm3, %xmm1, %xmm3 @@ -1580,6 +1575,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; XOPAVX1-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpcomeqd %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 @@ -1587,7 +1583,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero @@ -1597,6 +1592,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; XOPAVX2-NEXT: vpslld %xmm4, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpbroadcastd %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcomeqd %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 @@ -1604,8 +1600,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; X32-SSE-LABEL: splatvar_funnnel_v4i32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm2 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 ; X32-SSE-NEXT: xorps %xmm4, %xmm4 ; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] @@ -1617,6 +1612,7 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; X32-SSE-NEXT: movd %ecx, %xmm4 ; X32-SSE-NEXT: pslld %xmm4, %xmm0 ; X32-SSE-NEXT: por %xmm5, %xmm0 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 ; X32-SSE-NEXT: pand %xmm2, %xmm1 ; X32-SSE-NEXT: pandn %xmm0, %xmm2 @@ -1631,21 +1627,21 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; SSE2-NEXT: psubw %xmm3, %xmm4 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpeqw %xmm3, %xmm2 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; SSE2-NEXT: psubw %xmm2, %xmm3 +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: psrlw %xmm2, %xmm5 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] ; SSE2-NEXT: psrldq {{.*#+}} xmm3 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: psrlw %xmm3, %xmm5 -; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psllw %xmm4, %xmm0 +; SSE2-NEXT: psllw %xmm3, %xmm0 ; SSE2-NEXT: por %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,0,0,0] +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqw %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pandn %xmm0, %xmm2 ; SSE2-NEXT: por %xmm1, %xmm2 @@ -1655,8 +1651,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; SSE41-NEXT: movdqa %xmm1, %xmm4 @@ -1666,6 +1660,8 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SSE41-NEXT: psllw %xmm0, %xmm3 ; SSE41-NEXT: por %xmm4, %xmm3 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3 @@ -1674,8 +1670,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX1-LABEL: splatvar_funnnel_v8i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX1-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1684,6 +1678,8 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1691,7 +1687,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX2-LABEL: splatvar_funnnel_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1700,6 +1695,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1707,7 +1703,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1716,6 +1711,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1723,7 +1719,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1732,6 +1727,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1740,7 +1736,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1749,6 +1744,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1758,7 +1754,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1767,6 +1762,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX512VBMI2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1775,7 +1771,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1784,6 +1779,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VLBW-NEXT: vptestnmw %xmm2, %xmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -1797,8 +1793,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX1-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1807,6 +1801,8 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1814,7 +1810,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v8i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 @@ -1823,6 +1818,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm4, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1830,21 +1826,21 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; X32-SSE-LABEL: splatvar_funnnel_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; X32-SSE-NEXT: psubw %xmm3, %xmm4 -; X32-SSE-NEXT: pxor %xmm2, %xmm2 -; X32-SSE-NEXT: pcmpeqw %xmm3, %xmm2 +; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; X32-SSE-NEXT: psubw %xmm2, %xmm3 +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X32-SSE-NEXT: movdqa %xmm1, %xmm5 +; X32-SSE-NEXT: psrlw %xmm2, %xmm5 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm3 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: movdqa %xmm1, %xmm5 -; X32-SSE-NEXT: psrlw %xmm3, %xmm5 -; X32-SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] -; X32-SSE-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: psllw %xmm4, %xmm0 +; X32-SSE-NEXT: psllw %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm5, %xmm0 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,0,0,0] +; X32-SSE-NEXT: pxor %xmm2, %xmm2 +; X32-SSE-NEXT: pcmpeqw %xmm3, %xmm2 ; X32-SSE-NEXT: pand %xmm2, %xmm1 ; X32-SSE-NEXT: pandn %xmm0, %xmm2 ; X32-SSE-NEXT: por %xmm1, %xmm2 @@ -1858,63 +1854,63 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v16i8: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE2-NEXT: psubb %xmm3, %xmm4 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpeqb %xmm3, %xmm2 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0] ; SSE2-NEXT: psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: psrlw %xmm3, %xmm5 +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: psrlw %xmm3, %xmm4 +; SSE2-NEXT: pcmpeqd %xmm5, %xmm5 ; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 ; SSE2-NEXT: psrlw %xmm3, %xmm6 -; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE2-NEXT: psrlw $8, %xmm6 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0] -; SSE2-NEXT: pand %xmm5, %xmm6 +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm6[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; SSE2-NEXT: psubb %xmm2, %xmm4 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psllw %xmm4, %xmm0 -; SSE2-NEXT: psllw %xmm4, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: por %xmm6, %xmm3 -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm2 -; SSE2-NEXT: por %xmm1, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: psllw %xmm4, %xmm5 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] +; SSE2-NEXT: pand %xmm0, %xmm4 +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqb %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pandn %xmm4, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; SSE41-NEXT: movdqa %xmm1, %xmm5 -; SSE41-NEXT: psrlw %xmm4, %xmm5 +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: movdqa %xmm1, %xmm4 +; SSE41-NEXT: psrlw %xmm0, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm7, %xmm7 -; SSE41-NEXT: psrlw %xmm4, %xmm7 -; SSE41-NEXT: pshufb {{.*#+}} xmm7 = xmm7[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE41-NEXT: pand %xmm5, %xmm7 -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE41-NEXT: psubb %xmm2, %xmm4 -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; SSE41-NEXT: psllw %xmm4, %xmm3 -; SSE41-NEXT: psllw %xmm4, %xmm6 -; SSE41-NEXT: pshufb %xmm0, %xmm6 -; SSE41-NEXT: pand %xmm6, %xmm3 -; SSE41-NEXT: por %xmm7, %xmm3 +; SSE41-NEXT: psrlw %xmm0, %xmm6 +; SSE41-NEXT: pshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pand %xmm4, %xmm6 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; SSE41-NEXT: psubb %xmm2, %xmm0 +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: psllw %xmm0, %xmm3 +; SSE41-NEXT: psllw %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pshufb %xmm0, %xmm5 +; SSE41-NEXT: pand %xmm5, %xmm3 +; SSE41-NEXT: por %xmm6, %xmm3 +; SSE41-NEXT: pshufb %xmm0, %xmm2 ; SSE41-NEXT: pcmpeqb %xmm2, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 @@ -1922,30 +1918,29 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX1-LABEL: splatvar_funnnel_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpsrlw %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpsubb %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,zero,zero,zero,zero,xmm5[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsllw %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpshufb %xmm3, %xmm5, %xmm5 -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpsrlw %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX1-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqb %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm3, %xmm1, %xmm4 @@ -1962,6 +1957,7 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX2-NEXT: vpbroadcastb %xmm4, %xmm4 ; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 @@ -1969,9 +1965,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512F-LABEL: splatvar_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1989,9 +1985,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VL-LABEL: splatvar_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -2010,9 +2006,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512BW-LABEL: splatvar_funnnel_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero @@ -2031,9 +2027,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero @@ -2051,9 +2047,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm3, %ymm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -2070,9 +2066,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512VLVBMI2-NEXT: vpsrlvw %ymm4, %ymm3, %ymm3 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -2089,9 +2085,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i8: ; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm4 ; XOPAVX1-NEXT: vpshlb %xmm4, %xmm1, %xmm4 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -2104,8 +2100,8 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX2-NEXT: vpbroadcastb %xmm2, %xmm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm4 ; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm4 @@ -2119,39 +2115,39 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; X32-SSE-LABEL: splatvar_funnnel_v16i8: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X32-SSE-NEXT: psubb %xmm3, %xmm4 -; X32-SSE-NEXT: pxor %xmm2, %xmm2 -; X32-SSE-NEXT: pcmpeqb %xmm3, %xmm2 +; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: movdqa %xmm2, %xmm3 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: movdqa %xmm1, %xmm5 -; X32-SSE-NEXT: psrlw %xmm3, %xmm5 +; X32-SSE-NEXT: movdqa %xmm1, %xmm4 +; X32-SSE-NEXT: psrlw %xmm3, %xmm4 +; X32-SSE-NEXT: pcmpeqd %xmm5, %xmm5 ; X32-SSE-NEXT: pcmpeqd %xmm6, %xmm6 ; X32-SSE-NEXT: psrlw %xmm3, %xmm6 -; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm3 ; X32-SSE-NEXT: psrlw $8, %xmm6 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0] -; X32-SSE-NEXT: pand %xmm5, %xmm6 +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm6[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] +; X32-SSE-NEXT: pand %xmm4, %xmm3 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; X32-SSE-NEXT: psubb %xmm2, %xmm4 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm4 = xmm4[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psllw %xmm4, %xmm0 -; X32-SSE-NEXT: psllw %xmm4, %xmm3 -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] -; X32-SSE-NEXT: pand %xmm0, %xmm3 -; X32-SSE-NEXT: por %xmm6, %xmm3 -; X32-SSE-NEXT: pand %xmm2, %xmm1 -; X32-SSE-NEXT: pandn %xmm3, %xmm2 -; X32-SSE-NEXT: por %xmm1, %xmm2 -; X32-SSE-NEXT: movdqa %xmm2, %xmm0 +; X32-SSE-NEXT: psllw %xmm4, %xmm5 +; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] +; X32-SSE-NEXT: pand %xmm0, %xmm4 +; X32-SSE-NEXT: por %xmm3, %xmm4 +; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] +; X32-SSE-NEXT: pxor %xmm0, %xmm0 +; X32-SSE-NEXT: pcmpeqb %xmm2, %xmm0 +; X32-SSE-NEXT: pand %xmm0, %xmm1 +; X32-SSE-NEXT: pandn %xmm4, %xmm0 +; X32-SSE-NEXT: por %xmm1, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <16 x i8> %amt, <16 x i8> undef, <16 x i32> zeroinitializer %res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %splat) diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index c57b9699861b7..e40e3cdfbd65a 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -910,39 +910,35 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm4 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [64,64] -; AVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpsllq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpsllq %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] +; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqq %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX2-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 @@ -951,14 +947,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512F-LABEL: splatvar_funnnel_v4i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512F-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512F-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -966,13 +962,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; AVX512VL-LABEL: splatvar_funnnel_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm2, %ymm2 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VL-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512VL-NEXT: vptestnmq %ymm2, %ymm2, %k1 ; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq @@ -980,14 +976,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512BW-LABEL: splatvar_funnnel_v4i64: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512BW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -996,14 +992,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VBMI2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -1011,13 +1007,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VLBW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512VLBW-NEXT: vptestnmq %ymm2, %ymm2, %k1 ; AVX512VLBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -1031,39 +1027,35 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,1,0,1] -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm4 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [64,64] -; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm6 -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpsllq %xmm6, %xmm7, %xmm6 -; XOPAVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsllq %xmm5, %xmm0, %xmm0 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] +; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; XOPAVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpcomeqq %xmm3, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpcomeqq %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2 -; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsrlq %xmm2, %ymm1, %ymm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; XOPAVX2-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; XOPAVX2-NEXT: vpsllq %xmm4, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 @@ -1076,37 +1068,31 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpsrld %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vpsrld %xmm3, %xmm1, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32] -; AVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpslld %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vpsubd %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero -; AVX1-NEXT: vpslld %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; AVX1-NEXT: vpsrld %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32,32,32,32] +; AVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpslld %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpslld %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX2-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1114,6 +1100,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX2-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 @@ -1122,9 +1109,8 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512F-LABEL: splatvar_funnnel_v8i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512F-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512F-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1132,6 +1118,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512F-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -1139,8 +1126,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VL-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1148,6 +1134,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VL-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512VL-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; AVX512VL-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq @@ -1155,9 +1142,8 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512BW-LABEL: splatvar_funnnel_v8i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512BW-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1165,6 +1151,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512BW-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512BW-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -1173,9 +1160,8 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VBMI2-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1183,6 +1169,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VBMI2-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -1190,8 +1177,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VLBW-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1199,6 +1185,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VLBW-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2 ; AVX512VLBW-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -1212,37 +1199,31 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0] -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; XOPAVX1-NEXT: vpsrld %xmm3, %xmm4, %xmm4 -; XOPAVX1-NEXT: vpsrld %xmm3, %xmm1, %xmm3 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32] -; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm6 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpslld %xmm6, %xmm7, %xmm6 -; XOPAVX1-NEXT: vpsubd %xmm2, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero -; XOPAVX1-NEXT: vpslld %xmm5, %xmm0, %xmm0 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; XOPAVX1-NEXT: vpsrld %xmm4, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm4 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32,32,32,32] +; XOPAVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; XOPAVX1-NEXT: vpslld %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpslld %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpcomeqd %xmm3, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpcomeqd %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd %xmm2, %ymm2 -; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; XOPAVX2-NEXT: vpsrld %xmm3, %ymm1, %ymm3 ; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -1250,6 +1231,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; XOPAVX2-NEXT: vpslld %xmm4, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpbroadcastd %xmm2, %ymm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 @@ -1262,30 +1244,25 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX1-NEXT: vpsrlw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpsllw %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; AVX1-NEXT: vpsllw %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpsllw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 @@ -1293,8 +1270,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1302,6 +1278,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1309,8 +1286,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1318,6 +1294,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1325,8 +1302,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1334,6 +1310,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1342,8 +1319,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1351,6 +1327,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -1359,8 +1336,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1368,6 +1344,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -1375,8 +1352,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1384,6 +1360,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2 ; AVX512VLBW-NEXT: vptestnmw %ymm2, %ymm2, %k1 ; AVX512VLBW-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -1397,37 +1374,31 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm4 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7] ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; XOPAVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm4 -; XOPAVX1-NEXT: vpsrlw %xmm3, %xmm1, %xmm3 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; XOPAVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpsllw %xmm6, %xmm7, %xmm6 -; XOPAVX1-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; XOPAVX1-NEXT: vpsllw %xmm5, %xmm0, %xmm0 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; XOPAVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; XOPAVX1-NEXT: vpsllw %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsllw %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpcomeqw %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastw %xmm2, %ymm2 -; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -1435,6 +1406,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpbroadcastw %xmm2, %ymm2 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1447,40 +1419,33 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v32i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 -; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 -; AVX1-NEXT: vpsrlw %xmm4, %xmm8, %xmm7 -; AVX1-NEXT: vpshufb {{.*#+}} xmm7 = xmm7[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpsrlw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm6 +; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm4 -; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4 -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm9 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpsubb %xmm5, %xmm7, %xmm6 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpsllw %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpsllw %xmm6, %xmm8, %xmm6 -; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm6 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpsubb %xmm2, %xmm7, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX1-NEXT: vpsubb %xmm2, %xmm6, %xmm6 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 +; AVX1-NEXT: vpsllw %xmm6, %xmm7, %xmm7 +; AVX1-NEXT: vpsllw %xmm6, %xmm5, %xmm5 +; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpand %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpsllw %xmm6, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw %xmm6, %xmm8, %xmm6 -; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm6 -; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vorps %ymm0, %ymm9, %ymm0 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm5, %xmm4 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 @@ -1488,8 +1453,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX2-LABEL: splatvar_funnnel_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 ; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -1505,6 +1469,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1512,8 +1477,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512F-LABEL: splatvar_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -1529,6 +1493,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1536,8 +1501,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512VL-LABEL: splatvar_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -1553,6 +1517,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 @@ -1561,9 +1526,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512BW-LABEL: splatvar_funnnel_v32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1581,9 +1546,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1600,9 +1565,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1618,9 +1583,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero ; AVX512VLVBMI2-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] @@ -1636,43 +1601,36 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v32i8: ; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; XOPAVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm5 -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 -; XOPAVX1-NEXT: vpshlb %xmm5, %xmm6, %xmm5 -; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm6 -; XOPAVX1-NEXT: vpshlb %xmm6, %xmm1, %xmm6 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; XOPAVX1-NEXT: vpsubb %xmm4, %xmm8, %xmm7 +; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm4 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; XOPAVX1-NEXT: vpshlb %xmm4, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpshlb %xmm4, %xmm1, %xmm4 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; XOPAVX1-NEXT: vpsubb %xmm2, %xmm5, %xmm5 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; XOPAVX1-NEXT: vpshlb %xmm7, %xmm6, %xmm6 -; XOPAVX1-NEXT: vpsubb %xmm2, %xmm8, %xmm7 -; XOPAVX1-NEXT: vpshlb %xmm7, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlb %xmm5, %xmm6, %xmm6 +; XOPAVX1-NEXT: vpshlb %xmm5, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 -; XOPAVX1-NEXT: vorps %ymm5, %ymm0, %ymm0 -; XOPAVX1-NEXT: vpcomeqb %xmm3, %xmm4, %xmm4 +; XOPAVX1-NEXT: vorps %ymm4, %ymm0, %ymm0 ; XOPAVX1-NEXT: vpcomeqb %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v32i8: ; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; XOPAVX2-NEXT: vpbroadcastb %xmm2, %ymm2 -; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; XOPAVX2-NEXT: vpsubb %xmm3, %xmm4, %xmm3 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 -; XOPAVX2-NEXT: vpshlb %xmm3, %xmm5, %xmm3 -; XOPAVX2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm4 -; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 +; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 +; XOPAVX2-NEXT: vpshlb %xmm3, %xmm4, %xmm4 +; XOPAVX2-NEXT: vpshlb %xmm3, %xmm1, %xmm3 +; XOPAVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; XOPAVX2-NEXT: vpsubb %ymm2, %ymm4, %ymm4 ; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index ef4097addc60d..fd59d40cefc88 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -556,39 +556,41 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v8i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpsrlq %xmm2, %zmm1, %zmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512F-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpsllq %xmm4, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512F-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v8i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsrlq %xmm2, %zmm1, %zmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VL-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsllq %xmm4, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512VL-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v8i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpsrlq %xmm2, %zmm1, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512BW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsllq %xmm4, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -602,13 +604,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpsrlq %xmm2, %zmm1, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64] ; AVX512VLBW-NEXT: vpsubq %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsllq %xmm4, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -627,8 +629,8 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> % define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v16i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512F-NEXT: vpsrld %xmm3, %zmm1, %zmm3 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -636,14 +638,14 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512F-NEXT: vpslld %xmm4, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm3, %zmm0, %zmm0 +; AVX512F-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v16i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VL-NEXT: vpsrld %xmm3, %zmm1, %zmm3 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -651,14 +653,15 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VL-NEXT: vpslld %xmm4, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm3, %zmm0, %zmm0 +; AVX512VL-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512VL-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v16i32: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] +; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512BW-NEXT: vpsrld %xmm3, %zmm1, %zmm3 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -666,6 +669,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512BW-NEXT: vpslld %xmm4, %zmm0, %zmm0 ; AVX512BW-NEXT: vpord %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -679,8 +683,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero ; AVX512VLBW-NEXT: vpsrld %xmm3, %zmm1, %zmm3 ; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32] @@ -688,6 +691,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero ; AVX512VLBW-NEXT: vpslld %xmm4, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpord %zmm3, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -706,64 +710,53 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm4 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm7 -; AVX512F-NEXT: vpsllw %xmm6, %ymm7, %ymm6 -; AVX512F-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm5, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5 +; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5 +; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm3 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpcmpeqw %ymm0, %ymm4, %ymm4 ; AVX512F-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm4, %xmm5, %xmm6 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm7 -; AVX512VL-NEXT: vpsllw %xmm6, %ymm7, %ymm6 -; AVX512VL-NEXT: vpsubw %xmm2, %xmm5, %xmm5 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm2, %xmm4, %xmm4 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm3 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vpcmpeqw %ymm0, %ymm4, %ymm4 ; AVX512VL-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm1, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -771,6 +764,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -784,8 +778,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm1, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] @@ -793,6 +786,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -811,88 +805,73 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm4 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsrlw %xmm3, %xmm5, %xmm6 +; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm6 ; AVX512F-NEXT: vpsrlw $8, %xmm6, %xmm6 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm4, %xmm6, %xmm7 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm8 -; AVX512F-NEXT: vpsllw %xmm7, %ymm8, %ymm8 -; AVX512F-NEXT: vpsllw %xmm7, %xmm5, %xmm7 -; AVX512F-NEXT: vpbroadcastb %xmm7, %ymm7 -; AVX512F-NEXT: vpand %ymm7, %ymm8, %ymm7 -; AVX512F-NEXT: vpsubb %xmm2, %xmm6, %xmm6 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm6, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw %xmm6, %xmm5, %xmm5 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm4 +; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6 +; AVX512F-NEXT: vpsllw %xmm4, %ymm6, %ymm6 +; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm5 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 +; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6 +; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm3 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm4, %ymm4 ; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm4 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsrlw %xmm3, %xmm5, %xmm6 +; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm6 ; AVX512VL-NEXT: vpsrlw $8, %xmm6, %xmm6 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm3 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm4, %xmm6, %xmm7 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm8 -; AVX512VL-NEXT: vpsllw %xmm7, %ymm8, %ymm8 -; AVX512VL-NEXT: vpsllw %xmm7, %xmm5, %xmm7 -; AVX512VL-NEXT: vpbroadcastb %xmm7, %ymm7 -; AVX512VL-NEXT: vpand %ymm7, %ymm8, %ymm7 -; AVX512VL-NEXT: vpsubb %xmm2, %xmm6, %xmm6 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm6, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsllw %xmm6, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm4 +; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm6 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm6, %ymm6 +; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 +; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm3 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm4, %ymm4 ; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 ; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -908,14 +887,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 ; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -931,14 +910,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 ; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -954,14 +933,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLBW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 ; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 @@ -977,6 +956,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLVBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VLVBMI2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index e923df1c01423..d0966b8fab6e0 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -766,17 +766,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; @@ -824,7 +824,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: psubd %xmm1, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 @@ -840,7 +839,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -855,7 +853,6 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] @@ -901,17 +898,17 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; @@ -937,17 +934,15 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: psubw %xmm1, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: psllw %xmm1, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] ; SSE2-NEXT: psubw %xmm2, %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psllw %xmm2, %xmm3 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm1, %xmm0 @@ -956,8 +951,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: psubw %xmm1, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 @@ -971,40 +964,22 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: splatvar_funnnel_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_funnnel_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_funnnel_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: splatvar_funnnel_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -1019,34 +994,32 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v8i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; ; X32-SSE-LABEL: splatvar_funnnel_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X32-SSE-NEXT: pxor %xmm2, %xmm2 ; X32-SSE-NEXT: psubw %xmm1, %xmm2 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] +; X32-SSE-NEXT: pand %xmm2, %xmm1 +; X32-SSE-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE-NEXT: psllw %xmm1, %xmm3 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] ; X32-SSE-NEXT: psubw %xmm2, %xmm1 -; X32-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] -; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: movdqa %xmm0, %xmm3 -; X32-SSE-NEXT: psllw %xmm2, %xmm3 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psrlw %xmm1, %xmm0 @@ -1060,9 +1033,6 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v16i8: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: psubb %xmm1, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 @@ -1095,7 +1065,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; SSE41-LABEL: splatvar_funnnel_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pshufb %xmm2, %xmm1 ; SSE41-NEXT: pxor %xmm3, %xmm3 ; SSE41-NEXT: psubb %xmm1, %xmm3 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 @@ -1121,7 +1090,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; AVX1-LABEL: splatvar_funnnel_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero @@ -1142,7 +1110,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -1239,24 +1206,21 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; XOPAVX1-LABEL: splatvar_funnnel_v16i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; ; X32-SSE-LABEL: splatvar_funnnel_v16i8: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X32-SSE-NEXT: pxor %xmm2, %xmm2 ; X32-SSE-NEXT: psubb %xmm1, %xmm2 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 723a9dc51bc82..930795283a24e 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -623,9 +623,9 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0 @@ -634,12 +634,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2 +; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2 ; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -651,9 +650,9 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 @@ -671,9 +670,9 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero @@ -715,9 +714,9 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 @@ -726,12 +725,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind ; ; XOPAVX2-LABEL: splatvar_funnnel_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2 +; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2 ; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -743,10 +741,10 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 @@ -764,9 +762,9 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 @@ -779,9 +777,9 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; AVX512-LABEL: splatvar_funnnel_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 @@ -794,10 +792,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0 @@ -806,12 +804,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastw %xmm1, %ymm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2 +; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2 ; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -824,8 +821,8 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX1-LABEL: splatvar_funnnel_v32i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 @@ -852,9 +849,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -875,9 +872,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512F-LABEL: splatvar_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -898,9 +895,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -956,8 +953,8 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; XOPAVX1-LABEL: splatvar_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0 @@ -966,12 +963,11 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; XOPAVX2-LABEL: splatvar_funnnel_v32i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsubb %ymm1, %ymm2, %ymm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2 +; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2 ; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index c4ce14c6cc056..7dad18a324d73 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -749,7 +749,6 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; ; SSE41-LABEL: splatvar_rotate_v4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: movdqa %xmm0, %xmm3 @@ -763,7 +762,6 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; ; AVX1-LABEL: splatvar_rotate_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm2 @@ -776,7 +774,6 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; ; AVX2-LABEL: splatvar_rotate_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero @@ -854,15 +851,13 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_rotate_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: psllw %xmm2, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] ; SSE2-NEXT: psubw %xmm1, %xmm2 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psllw %xmm1, %xmm3 ; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] ; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm2, %xmm0 @@ -871,8 +866,6 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; SSE41-LABEL: splatvar_rotate_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; SSE41-NEXT: movdqa %xmm0, %xmm3 @@ -884,36 +877,20 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: splatvar_rotate_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_rotate_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_rotate_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: splatvar_rotate_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 @@ -939,15 +916,13 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; X32-SSE-LABEL: splatvar_rotate_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; X32-SSE-NEXT: pand %xmm1, %xmm2 +; X32-SSE-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE-NEXT: psllw %xmm2, %xmm3 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] ; X32-SSE-NEXT: psubw %xmm1, %xmm2 -; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X32-SSE-NEXT: movdqa %xmm0, %xmm3 -; X32-SSE-NEXT: psllw %xmm1, %xmm3 ; X32-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] ; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psrlw %xmm2, %xmm0 @@ -964,9 +939,6 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE2-LABEL: splatvar_rotate_v16i8: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; SSE2-NEXT: psubb %xmm1, %xmm2 @@ -997,44 +969,42 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; SSE41-LABEL: splatvar_rotate_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pshufb %xmm3, %xmm1 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psllw %xmm4, %xmm2 +; SSE41-NEXT: psllw %xmm3, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE41-NEXT: psllw %xmm4, %xmm6 -; SSE41-NEXT: pshufb %xmm3, %xmm6 -; SSE41-NEXT: pand %xmm6, %xmm2 +; SSE41-NEXT: psllw %xmm3, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pshufb %xmm3, %xmm5 +; SSE41-NEXT: pand %xmm5, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; SSE41-NEXT: psubb %xmm1, %xmm3 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: psrlw %xmm1, %xmm0 -; SSE41-NEXT: psrlw %xmm1, %xmm5 -; SSE41-NEXT: pshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SSE41-NEXT: pand %xmm0, %xmm5 -; SSE41-NEXT: por %xmm5, %xmm2 +; SSE41-NEXT: psrlw %xmm1, %xmm4 +; SSE41-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pand %xmm0, %xmm4 +; SSE41-NEXT: por %xmm4, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: splatvar_rotate_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: vpsllw %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpsllw %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpshufb %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 @@ -1042,7 +1012,6 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX2-LABEL: splatvar_rotate_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm3 @@ -1137,9 +1106,6 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; X32-SSE-LABEL: splatvar_rotate_v16i8: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; X32-SSE-NEXT: psubb %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 9ad8d44acb24b..dc0d0a1168b71 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -645,17 +645,17 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; ; XOPAVX1-LABEL: splatvar_shift_v2i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_shift_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; @@ -848,16 +848,16 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; XOPAVX1-LABEL: splatvar_shift_v16i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_shift_v16i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index a587a43f1bbb1..1a2889ab861e2 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -698,16 +698,16 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; XOPAVX1-LABEL: splatvar_shift_v16i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatvar_shift_v16i8: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 +; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; From 4d20e31f736c76785e03367c036183474459ef9a Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Tue, 26 May 2020 19:24:05 +0700 Subject: [PATCH 091/770] [FPEnv] Intrinsic llvm.roundeven This intrinsic implements IEEE-754 operation roundToIntegralTiesToEven, and performs rounding to the nearest integer value, rounding halfway cases to even. The intrinsic represents the missed case of IEEE-754 rounding operations and now llvm provides full support of the rounding operations defined by the standard. Differential Revision: https://reviews.llvm.org/D75670 --- llvm/docs/LangRef.rst | 74 ++++++++++++++++ .../llvm/Analysis/TargetLibraryInfo.def | 9 ++ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 + llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 + llvm/include/llvm/IR/ConstrainedOps.def | 1 + llvm/include/llvm/IR/Intrinsics.td | 4 + llvm/include/llvm/IR/RuntimeLibcalls.def | 5 ++ llvm/lib/Analysis/ConstantFolding.cpp | 7 ++ llvm/lib/Analysis/InstructionSimplify.cpp | 2 + llvm/lib/Analysis/TargetLibraryInfo.cpp | 3 + llvm/lib/Analysis/ValueTracking.cpp | 5 ++ llvm/lib/Analysis/VectorUtils.cpp | 1 + llvm/lib/CodeGen/IntrinsicLowering.cpp | 4 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 9 ++ .../SelectionDAG/LegalizeFloatTypes.cpp | 25 ++++++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 + .../SelectionDAG/LegalizeVectorOps.cpp | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 3 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + .../SelectionDAG/SelectionDAGBuilder.cpp | 2 + .../SelectionDAG/SelectionDAGDumper.cpp | 2 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 + .../InstCombine/InstCombineCalls.cpp | 1 + .../InstCombine/InstCombineCasts.cpp | 1 + .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 + llvm/test/CodeGen/Generic/fpoperations.ll | 21 +++++ .../ExecutionEngine/Interpreter/intrinsics.ll | 4 + .../InstCombine/double-float-shrink-2.ll | 86 +++++++++++++++++++ .../InstCombine/float-shrink-compare.ll | 54 ++++++++++++ .../InstSimplify/known-never-nan.ll | 11 +++ .../InstSimplify/round-intrinsics.ll | 11 +++ llvm/test/Transforms/LICM/hoist-round.ll | 5 +- .../Transforms/LoopVectorize/intrinsic.ll | 52 +++++++++++ .../Analysis/TargetLibraryInfoTest.cpp | 3 + llvm/unittests/IR/IRBuilderTest.cpp | 25 ++++++ 35 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Generic/fpoperations.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8bcad09964e20..01f41a7ea3f17 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -13160,6 +13160,44 @@ Semantics: This function returns the same values as the libm ``round`` functions would, and handles error conditions in the same way. +'``llvm.roundeven.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.roundeven`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.roundeven.f32(float %Val) + declare double @llvm.roundeven.f64(double %Val) + declare x86_fp80 @llvm.roundeven.f80(x86_fp80 %Val) + declare fp128 @llvm.roundeven.f128(fp128 %Val) + declare ppc_fp128 @llvm.roundeven.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.roundeven.*``' intrinsics returns the operand rounded to the nearest +integer in floating-point format rounding halfway cases to even (that is, to the +nearest value that is an even integer). + +Arguments: +"""""""""" + +The argument and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +This function implements IEEE-754 operation ``roundToIntegralTiesToEven``. It +also behaves in the same way as C standard function ``roundeven``, except that +it does not raise floating point exceptions. + + '``llvm.lround.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -18174,6 +18212,42 @@ This function returns the same values as the libm ``round`` functions would and handles error conditions in the same way. +'``llvm.experimental.constrained.roundeven``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.roundeven( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.roundeven``' intrinsic returns the first +operand rounded to the nearest integer in floating-point format, rounding +halfway cases to even (that is, to the nearest value that is an even integer), +regardless of the current rounding direction. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +This function implements IEEE-754 operation ``roundToIntegralTiesToEven``. It +also behaves in the same way as C standard function ``roundeven`` and can signal +the invalid operation exception for a SNAN operand. + + '``llvm.experimental.constrained.lround``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index f782c56d96a56..0022e7b8b5569 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1158,6 +1158,15 @@ TLI_DEFINE_STRING_INTERNAL("rmdir") /// double round(double x); TLI_DEFINE_ENUM_INTERNAL(round) TLI_DEFINE_STRING_INTERNAL("round") +/// double roundeven(double x); +TLI_DEFINE_ENUM_INTERNAL(roundeven) +TLI_DEFINE_STRING_INTERNAL("roundeven") +/// float roundevenf(float x); +TLI_DEFINE_ENUM_INTERNAL(roundevenf) +TLI_DEFINE_STRING_INTERNAL("roundevenf") +/// long double roundevenl(long double x); +TLI_DEFINE_ENUM_INTERNAL(roundevenl) +TLI_DEFINE_STRING_INTERNAL("roundevenl") /// float roundf(float x); TLI_DEFINE_ENUM_INTERNAL(roundf) TLI_DEFINE_STRING_INTERNAL("roundf") diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7866e71853cf3..cc751a5b47898 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1328,6 +1328,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::round: ISDs.push_back(ISD::FROUND); break; + case Intrinsic::roundeven: + ISDs.push_back(ISD::FROUNDEVEN); + break; case Intrinsic::pow: ISDs.push_back(ISD::FPOW); break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 839e82d9d84f7..f081a53263eff 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -376,6 +376,7 @@ enum NodeType { STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, + STRICT_FROUNDEVEN, STRICT_FTRUNC, STRICT_LROUND, STRICT_LLROUND, @@ -752,6 +753,7 @@ enum NodeType { FRINT, FNEARBYINT, FROUND, + FROUNDEVEN, FFLOOR, LROUND, LLROUND, diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 9be92b36da9f0..ecba68fe0c0e3 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -91,6 +91,7 @@ DAG_FUNCTION(pow, 2, 1, experimental_constrained_pow, FPOW) DAG_FUNCTION(powi, 2, 1, experimental_constrained_powi, FPOWI) DAG_FUNCTION(rint, 1, 1, experimental_constrained_rint, FRINT) DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND) +DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN) DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN) DAG_FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT) DAG_FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 51df06cee3587..7bfb25b0ed7dd 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -579,6 +579,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_rint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_roundeven : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_canonicalize : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; @@ -783,6 +784,9 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; + def int_experimental_constrained_roundeven : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty ]>; def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index fe2c32e3c975e..903db6c704987 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -234,6 +234,11 @@ HANDLE_LIBCALL(ROUND_F64, "round") HANDLE_LIBCALL(ROUND_F80, "roundl") HANDLE_LIBCALL(ROUND_F128, "roundl") HANDLE_LIBCALL(ROUND_PPCF128, "roundl") +HANDLE_LIBCALL(ROUNDEVEN_F32, "roundevenf") +HANDLE_LIBCALL(ROUNDEVEN_F64, "roundeven") +HANDLE_LIBCALL(ROUNDEVEN_F80, "roundevenl") +HANDLE_LIBCALL(ROUNDEVEN_F128, "roundevenl") +HANDLE_LIBCALL(ROUNDEVEN_PPCF128, "roundevenl") HANDLE_LIBCALL(FLOOR_F32, "floorf") HANDLE_LIBCALL(FLOOR_F64, "floor") HANDLE_LIBCALL(FLOOR_F80, "floorl") diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 4fdc73cdbe570..7eafc7a6623f7 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1493,6 +1493,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::ceil: case Intrinsic::floor: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::trunc: case Intrinsic::nearbyint: case Intrinsic::rint: @@ -1501,6 +1502,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_roundeven: case Intrinsic::experimental_constrained_trunc: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_rint: @@ -1785,6 +1787,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::get(Ty->getContext(), U); } + if (IntrinsicID == Intrinsic::roundeven) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); + } + if (IntrinsicID == Intrinsic::ceil) { U.roundToIntegral(APFloat::rmTowardPositive); return ConstantFP::get(Ty->getContext(), U); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 15f5a9c672c8d..45850e41f978b 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5061,6 +5061,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::canonicalize: return true; } @@ -5176,6 +5177,7 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, case Intrinsic::trunc: case Intrinsic::ceil: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::nearbyint: case Intrinsic::rint: { // floor (sitofp x) -> sitofp x diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index cae71d130d79b..336480e8b9d99 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1341,6 +1341,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: + case LibFunc_roundeven: + case LibFunc_roundevenf: + case LibFunc_roundevenl: case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinh: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 1b73a2062095c..545dab7714df7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3227,6 +3227,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, case LibFunc_roundf: case LibFunc_roundl: return Intrinsic::round; + case LibFunc_roundeven: + case LibFunc_roundevenf: + case LibFunc_roundevenl: + return Intrinsic::roundeven; case LibFunc_pow: case LibFunc_powf: case LibFunc_powl: @@ -3567,6 +3571,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1); case Intrinsic::sqrt: return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) && diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 8a8bb19f36637..23531b65ea32d 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -78,6 +78,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::pow: case Intrinsic::fma: case Intrinsic::fmuladd: diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index d6635a6337aa9..e37c21e765977 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -421,6 +421,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl"); break; } + case Intrinsic::roundeven: { + ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl"); + break; + } case Intrinsic::copysign: { ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8bf6cb514144b..2ffcc859f8051 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4107,6 +4107,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128, Results); break; + case ISD::FROUNDEVEN: + case ISD::STRICT_FROUNDEVEN: + ExpandFPLibCall(Node, RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128, Results); + break; case ISD::FPOWI: case ISD::STRICT_FPOWI: { RTLIB::Libcall LC; @@ -4601,6 +4609,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FTRUNC: case ISD::FNEG: case ISD::FSQRT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 37e5abaae3eae..7e8ad28f9b143 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -113,6 +113,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FROUNDEVEN: + case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::STRICT_FSQRT: @@ -616,6 +618,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { RTLIB::ROUND_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUNDEVEN(SDNode *N) { + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, @@ -1178,6 +1189,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; case ISD::STRICT_FROUND: case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::STRICT_FROUNDEVEN: + case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::STRICT_FSQRT: @@ -1504,6 +1517,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, RTLIB::ROUND_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUNDEVEN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128), Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2136,6 +2159,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -2476,6 +2500,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FREEZE: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b729565ef7e73..4bc75ceb4928e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -530,6 +530,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); SDValue SoftenFloatRes_FROUND(SDNode *N); + SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -603,6 +604,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 8f746ec45f6c6..93ce338ff2327 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -427,6 +427,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6601dc68223eb..ff2c8d3a8db22 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -95,6 +95,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -888,6 +889,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -2825,6 +2827,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2dcab73b177b7..cfb15d6ca9d7c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4103,6 +4103,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FFLOOR: case ISD::FCEIL: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FRINT: case ISD::FNEARBYINT: { if (SNaN) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index efdf696f87944..dd03e415910cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6072,6 +6072,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { @@ -6086,6 +6087,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 816b1dcded2e8..7f9b8b7b28a38 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -211,6 +211,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::STRICT_FROUND: return "strict_fround"; + case ISD::FROUNDEVEN: return "froundeven"; + case ISD::STRICT_FROUNDEVEN: return "strict_froundeven"; case ISD::FEXP: return "fexp"; case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index b8062672efec9..62c3af95f9528 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -693,6 +693,7 @@ void TargetLoweringBase::initActions() { // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FROUNDEVEN, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); // These operations default to expand for vector types. @@ -758,6 +759,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FROUNDEVEN, VT, Expand); setOperationAction(ISD::LROUND, VT, Expand); setOperationAction(ISD::LLROUND, VT, Expand); setOperationAction(ISD::LRINT, VT, Expand); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 118013a387647..7e20d241bbab5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2422,6 +2422,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ceil: case Intrinsic::floor: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::nearbyint: case Intrinsic::rint: case Intrinsic::trunc: { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index c68f9e8980071..714d1ae8aaec3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1741,6 +1741,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { case Intrinsic::nearbyint: case Intrinsic::rint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::trunc: { Value *Src = II->getArgOperand(0); if (!Src->hasOneUse()) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 828f4ee5bbe46..c32db981ee7c2 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2930,6 +2930,8 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, return replaceUnaryCall(CI, Builder, Intrinsic::floor); case LibFunc_round: return replaceUnaryCall(CI, Builder, Intrinsic::round); + case LibFunc_roundeven: + return replaceUnaryCall(CI, Builder, Intrinsic::roundeven); case LibFunc_nearbyint: return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); case LibFunc_rint: diff --git a/llvm/test/CodeGen/Generic/fpoperations.ll b/llvm/test/CodeGen/Generic/fpoperations.ll new file mode 100644 index 0000000000000..53dd307db2492 --- /dev/null +++ b/llvm/test/CodeGen/Generic/fpoperations.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s | FileCheck %s + +; This test checks default lowering of the intrinsics operating floating point +; values. MSP430 is used as a target in this test because it does not have +; native FP support, so it won't get custom lowering for these intrinsics. +; +; REQUIRES: msp430-registered-target + +target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16" +target triple = "msp430---elf" + + +define float @roundeven_01(float %x) { +entry: + %res = call float @llvm.roundeven.f32(float %x) + ret float %res +} +; CHECK-LABEL: roundeven_01: +; CHECK: call #roundeven + +declare float @llvm.roundeven.f32(float %x) diff --git a/llvm/test/ExecutionEngine/Interpreter/intrinsics.ll b/llvm/test/ExecutionEngine/Interpreter/intrinsics.ll index 49d0bbee30484..468b6b7ab24eb 100644 --- a/llvm/test/ExecutionEngine/Interpreter/intrinsics.ll +++ b/llvm/test/ExecutionEngine/Interpreter/intrinsics.ll @@ -13,6 +13,8 @@ declare float @llvm.trunc.f32(float) declare double @llvm.trunc.f64(double) declare float @llvm.round.f32(float) declare double @llvm.round.f64(double) +declare float @llvm.roundeven.f32(float) +declare double @llvm.roundeven.f64(double) declare float @llvm.copysign.f32(float, float) declare double @llvm.copysign.f64(double, double) @@ -29,6 +31,8 @@ define i32 @main() { %trunc64 = call double @llvm.trunc.f64(double 0.000000e+00) %round32 = call float @llvm.round.f32(float 0.000000e+00) %round64 = call double @llvm.round.f64(double 0.000000e+00) + %roundeven32 = call float @llvm.roundeven.f32(float 0.000000e+00) + %roundeven64 = call double @llvm.roundeven.f64(double 0.000000e+00) %copysign32 = call float @llvm.copysign.f32(float 0.000000e+00, float 0.000000e+00) %copysign64 = call double @llvm.copysign.f64(double 0.000000e+00, double 0.000000e+00) ret i32 0 diff --git a/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll b/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll index 76e497bd68fc7..3a8f224b1d814 100644 --- a/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll +++ b/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll @@ -10,6 +10,7 @@ declare double @floor(double) declare double @ceil(double) declare double @round(double) +declare double @roundeven(double) declare double @nearbyint(double) declare double @trunc(double) declare double @fabs(double) @@ -32,6 +33,9 @@ declare <2 x float> @llvm.rint.v2f32(<2 x float>) declare double @llvm.round.f64(double) declare <2 x double> @llvm.round.v2f64(<2 x double>) +declare double @llvm.roundeven.f64(double) +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) + declare double @llvm.trunc.f64(double) declare <2 x double> @llvm.trunc.v2f64(<2 x double>) @@ -71,6 +75,18 @@ define float @test_shrink_libcall_round(float %C) { ret float %F } +define float @test_shrink_libcall_roundeven(float %C) { +; CHECK-LABEL: @test_shrink_libcall_roundeven( +; CHECK-NEXT: [[F:%.*]] = call float @llvm.roundeven.f32(float [[C:%.*]]) +; CHECK-NEXT: ret float [[F]] +; + %D = fpext float %C to double + ; --> roundeven + %E = call double @roundeven(double %D) + %F = fptrunc double %E to float + ret float %F +} + define float @test_shrink_libcall_nearbyint(float %C) { ; CHECK-LABEL: @test_shrink_libcall_nearbyint( ; CHECK-NEXT: [[F:%.*]] = call float @llvm.nearbyint.f32(float [[C:%.*]]) @@ -186,6 +202,17 @@ define float @test_shrink_intrin_round(float %C) { ret float %F } +define float @test_shrink_intrin_roundeven(float %C) { +; CHECK-LABEL: @test_shrink_intrin_roundeven( +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.roundeven.f32(float [[C:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] +; + %D = fpext float %C to double + %E = call double @llvm.roundeven.f64(double %D) + %F = fptrunc double %E to float + ret float %F +} + define float @test_shrink_intrin_trunc(float %C) { ; CHECK-LABEL: @test_shrink_intrin_trunc( ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[C:%.*]]) @@ -292,6 +319,23 @@ define <2 x float> @test_shrink_intrin_round_multi_use(<2 x float> %C) { ret <2 x float> %F } +define <2 x float> @test_shrink_intrin_roundeven_multi_use(<2 x float> %C) { +; CHECK-LABEL: @test_shrink_intrin_roundeven_multi_use( +; CHECK-NEXT: [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double> +; CHECK-NEXT: [[E:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> [[D]]) +; CHECK-NEXT: [[F:%.*]] = fptrunc <2 x double> [[E]] to <2 x float> +; CHECK-NEXT: call void @use_v2f64(<2 x double> [[D]]) +; CHECK-NEXT: call void @use_v2f64(<2 x double> [[E]]) +; CHECK-NEXT: ret <2 x float> [[F]] +; + %D = fpext <2 x float> %C to <2 x double> + %E = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %D) + %F = fptrunc <2 x double> %E to <2 x float> + call void @use_v2f64(<2 x double> %D) + call void @use_v2f64(<2 x double> %E) + ret <2 x float> %F +} + define <2 x float> @test_shrink_intrin_trunc_multi_use(<2 x float> %C) { ; CHECK-LABEL: @test_shrink_intrin_trunc_multi_use( ; CHECK-NEXT: [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double> @@ -352,6 +396,17 @@ define float @test_no_shrink_intrin_round(double %D) { ret float %F } +define float @test_no_shrink_intrin_roundeven(double %D) { +; CHECK-LABEL: @test_no_shrink_intrin_roundeven( +; CHECK-NEXT: [[E:%.*]] = call double @llvm.roundeven.f64(double [[D:%.*]]) +; CHECK-NEXT: [[F:%.*]] = fptrunc double [[E]] to float +; CHECK-NEXT: ret float [[F]] +; + %E = call double @llvm.roundeven.f64(double %D) + %F = fptrunc double %E to float + ret float %F +} + define float @test_no_shrink_intrin_nearbyint(double %D) { ; CHECK-LABEL: @test_no_shrink_intrin_nearbyint( ; CHECK-NEXT: [[E:%.*]] = call double @llvm.nearbyint.f64(double [[D:%.*]]) @@ -424,6 +479,15 @@ define float @test_shrink_float_convertible_constant_intrin_round() { ret float %F } +define float @test_shrink_float_convertible_constant_intrin_roundeven() { +; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_roundeven( +; CHECK-NEXT: ret float 2.000000e+00 +; + %E = call double @llvm.roundeven.f64(double 2.1) + %F = fptrunc double %E to float + ret float %F +} + define float @test_shrink_float_convertible_constant_intrin_nearbyint() { ; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_nearbyint( ; CHECK-NEXT: ret float 2.000000e+00 @@ -494,6 +558,17 @@ define half @test_no_shrink_mismatched_type_intrin_round(double %D) { ret half %F } +define half @test_no_shrink_mismatched_type_intrin_roundeven(double %D) { +; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_roundeven( +; CHECK-NEXT: [[E:%.*]] = call double @llvm.roundeven.f64(double [[D:%.*]]) +; CHECK-NEXT: [[F:%.*]] = fptrunc double [[E]] to half +; CHECK-NEXT: ret half [[F]] +; + %E = call double @llvm.roundeven.f64(double %D) + %F = fptrunc double %E to half + ret half %F +} + define half @test_no_shrink_mismatched_type_intrin_nearbyint(double %D) { ; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_nearbyint( ; CHECK-NEXT: [[E:%.*]] = call double @llvm.nearbyint.f64(double [[D:%.*]]) @@ -573,6 +648,17 @@ define <2 x double> @test_shrink_intrin_round_fp16_vec(<2 x half> %C) { ret <2 x double> %E } +define <2 x double> @test_shrink_intrin_roundeven_fp16_vec(<2 x half> %C) { +; CHECK-LABEL: @test_shrink_intrin_roundeven_fp16_vec( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.roundeven.v2f16(<2 x half> [[C:%.*]]) +; CHECK-NEXT: [[E:%.*]] = fpext <2 x half> [[TMP1]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[E]] +; + %D = fpext <2 x half> %C to <2 x double> + %E = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %D) + ret <2 x double> %E +} + define float @test_shrink_intrin_nearbyint_fp16_src(half %C) { ; CHECK-LABEL: @test_shrink_intrin_nearbyint_fp16_src( ; CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.nearbyint.f16(half [[C:%.*]]) diff --git a/llvm/test/Transforms/InstCombine/float-shrink-compare.ll b/llvm/test/Transforms/InstCombine/float-shrink-compare.ll index ca2f6d1c23cb2..aa0dd5e3007d8 100644 --- a/llvm/test/Transforms/InstCombine/float-shrink-compare.ll +++ b/llvm/test/Transforms/InstCombine/float-shrink-compare.ll @@ -160,6 +160,32 @@ define i1 @test6_intrin(float %x, float %y) { ret i1 %cmp } +define i1 @test6a(float %x, float %y) { +; CHECK-LABEL: @test6a( +; CHECK-NEXT: [[ROUND:%.*]] = call float @llvm.roundeven.f32(float %x) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.ext = fpext float %x to double + %round = call double @roundeven(double %x.ext) nounwind readnone + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %round, %y.ext + ret i1 %cmp +} + +define i1 @test6a_intrin(float %x, float %y) { +; CHECK-LABEL: @test6a_intrin( +; CHECK-NEXT: [[ROUND:%.*]] = call float @llvm.roundeven.f32(float %x) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.ext = fpext float %x to double + %round = call double @llvm.roundeven.f64(double %x.ext) nounwind readnone + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %round, %y.ext + ret i1 %cmp +} + define i1 @test7(float %x, float %y) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[TRUNC:%.*]] = call float @llvm.trunc.f32(float %x) @@ -329,6 +355,32 @@ define i1 @test13_intrin(float %x, float %y) { ret i1 %cmp } +define i1 @test13a(float %x, float %y) { +; CHECK-LABEL: @test13a( +; CHECK-NEXT: [[ROUND:%.*]] = call float @llvm.roundeven.f32(float %x) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %round = call double @roundeven(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %y.ext, %round + ret i1 %cmp +} + +define i1 @test13a_intrin(float %x, float %y) { +; CHECK-LABEL: @test13a_intrin( +; CHECK-NEXT: [[ROUND:%.*]] = call float @llvm.roundeven.f32(float %x) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %round = call double @llvm.roundeven.f64(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %y.ext, %round + ret i1 %cmp +} + define i1 @test14(float %x, float %y) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: [[TRUNC:%.*]] = call float @llvm.trunc.f32(float %x) @@ -462,6 +514,7 @@ declare double @floor(double) nounwind readnone declare double @nearbyint(double) nounwind readnone declare double @rint(double) nounwind readnone declare double @round(double) nounwind readnone +declare double @roundeven(double) nounwind readnone declare double @trunc(double) nounwind readnone declare double @fmin(double, double) nounwind readnone declare double @fmax(double, double) nounwind readnone @@ -471,4 +524,5 @@ declare double @llvm.ceil.f64(double) nounwind readnone declare double @llvm.floor.f64(double) nounwind readnone declare double @llvm.nearbyint.f64(double) nounwind readnone declare double @llvm.round.f64(double) nounwind readnone +declare double @llvm.roundeven.f64(double) nounwind readnone declare double @llvm.trunc.f64(double) nounwind readnone diff --git a/llvm/test/Transforms/InstSimplify/known-never-nan.ll b/llvm/test/Transforms/InstSimplify/known-never-nan.ll index 109775607bcc3..c2c26e6ee975f 100644 --- a/llvm/test/Transforms/InstSimplify/known-never-nan.ll +++ b/llvm/test/Transforms/InstSimplify/known-never-nan.ll @@ -147,6 +147,16 @@ define i1 @round_nnan_src(double %arg) { ret i1 %tmp } +define i1 @roundeven_nnan_src(double %arg) { +; CHECK-LABEL: @roundeven_nnan_src( +; CHECK-NEXT: ret i1 false +; + %nnan = fadd nnan double %arg, 1.0 + %op = call double @llvm.roundeven.f64(double %nnan) + %tmp = fcmp uno double %op, %op + ret i1 %tmp +} + define i1 @known_nan_select(i1 %cond, double %arg0, double %arg1) { ; CHECK-LABEL: @known_nan_select( ; CHECK-NEXT: ret i1 true @@ -416,3 +426,4 @@ declare double @llvm.trunc.f64(double) declare double @llvm.rint.f64(double) declare double @llvm.nearbyint.f64(double) declare double @llvm.round.f64(double) +declare double @llvm.roundeven.f64(double) diff --git a/llvm/test/Transforms/InstSimplify/round-intrinsics.ll b/llvm/test/Transforms/InstSimplify/round-intrinsics.ll index 42c78e000acd2..3b63bd6be6f78 100644 --- a/llvm/test/Transforms/InstSimplify/round-intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/round-intrinsics.ll @@ -81,6 +81,16 @@ define float @uitofp_round(i32 %arg) { ret float %round } +define float @uitofp_roundeven(i32 %arg) { +; CHECK-LABEL: @uitofp_roundeven( +; CHECK-NEXT: [[CVT:%.*]] = uitofp i32 [[ARG:%.*]] to float +; CHECK-NEXT: ret float [[CVT]] +; + %cvt = uitofp i32 %arg to float + %round = call float @llvm.roundeven.f32(float %cvt) + ret float %round +} + define float @sitofp_nearbyint(i32 %arg) { ; CHECK-LABEL: @sitofp_nearbyint( ; CHECK-NEXT: [[CVT:%.*]] = sitofp i32 [[ARG:%.*]] to float @@ -125,6 +135,7 @@ declare float @llvm.floor.f32(float) #0 declare float @llvm.trunc.f32(float) #0 declare float @llvm.ceil.f32(float) #0 declare float @llvm.round.f32(float) #0 +declare float @llvm.roundeven.f32(float) #0 declare float @llvm.nearbyint.f32(float) #0 declare float @llvm.rint.f32(float) #0 diff --git a/llvm/test/Transforms/LICM/hoist-round.ll b/llvm/test/Transforms/LICM/hoist-round.ll index 10f75be4d3270..c48847b40dbc2 100644 --- a/llvm/test/Transforms/LICM/hoist-round.ll +++ b/llvm/test/Transforms/LICM/hoist-round.ll @@ -20,6 +20,7 @@ target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:3 ; CHECK: call float @llvm.minnum.f32 ; CHECK: call float @llvm.maxnum.f32 ; CHECK: call float @llvm.powi.f32 +; CHECK: call float @llvm.roundeven.f32 ; CHECK: for.body: define void @test(float %arg1, float %arg2) { @@ -45,7 +46,8 @@ for.body: %tmp.11 = call float @llvm.minimum.f32(float %tmp.10, float %arg2) %tmp.12 = call float @llvm.maximum.f32(float %tmp.11, float %arg2) %tmp.13 = call float @llvm.powi.f32(float %tmp.12, i32 4) - call void @consume(float %tmp.13) + %tmp.14 = call float @llvm.roundeven.f32(float %tmp.13) + call void @consume(float %tmp.14) %IND.new = add i32 %IND, 1 br label %for.head @@ -68,3 +70,4 @@ declare float @llvm.maxnum.f32(float, float) declare float @llvm.minimum.f32(float, float) declare float @llvm.maximum.f32(float, float) declare float @llvm.powi.f32(float, i32) +declare float @llvm.roundeven.f32(float) diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 50cdb73ae8ec9..c2036c611334d 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -832,6 +832,58 @@ for.end: ; preds = %for.body, %entry declare double @llvm.round.f64(double) nounwind readnone +;CHECK-LABEL: @roundeven_f32( +;CHECK: llvm.roundeven.v4f32 +;CHECK: ret void +define void @roundeven_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @llvm.roundeven.f32(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.roundeven.f32(float) nounwind readnone + +;CHECK-LABEL: @roundeven_f64( +;CHECK: llvm.roundeven.v4f64 +;CHECK: ret void +define void @roundeven_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %call = tail call double @llvm.roundeven.f64(double %0) nounwind readnone + %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv + store double %call, double* %arrayidx2, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.roundeven.f64(double) nounwind readnone + ;CHECK-LABEL: @fma_f32( ;CHECK: llvm.fma.v4f32 ;CHECK: ret void diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index f21081467c1eb..bd5fefc013e88 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -276,6 +276,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare double @round(double)\n" "declare float @roundf(float)\n" "declare x86_fp80 @roundl(x86_fp80)\n" + "declare double @roundeven(double)\n" + "declare float @roundevenf(float)\n" + "declare x86_fp80 @roundevenl(x86_fp80)\n" "declare i32 @scanf(i8*, ...)\n" "declare void @setbuf(%struct*, i8*)\n" "declare i32 @setitimer(i32, %struct*, %struct*)\n" diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp index 5af5bc87944b6..bf230597a589f 100644 --- a/llvm/unittests/IR/IRBuilderTest.cpp +++ b/llvm/unittests/IR/IRBuilderTest.cpp @@ -121,6 +121,12 @@ TEST_F(IRBuilderTest, Intrinsics) { EXPECT_EQ(II->getIntrinsicID(), Intrinsic::fma); EXPECT_TRUE(II->hasNoInfs()); EXPECT_FALSE(II->hasNoNaNs()); + + Call = Builder.CreateUnaryIntrinsic(Intrinsic::roundeven, V); + II = cast(Call); + EXPECT_EQ(II->getIntrinsicID(), Intrinsic::roundeven); + EXPECT_FALSE(II->hasNoInfs()); + EXPECT_FALSE(II->hasNoNaNs()); } TEST_F(IRBuilderTest, IntrinsicsWithScalableVectors) { @@ -307,6 +313,25 @@ TEST_F(IRBuilderTest, ConstrainedFP) { EXPECT_FALSE(verifyModule(*M)); } +TEST_F(IRBuilderTest, ConstrainedFPIntrinsics) { + IRBuilder<> Builder(BB); + Value *V; + Value *VDouble; + ConstrainedFPIntrinsic *CII; + GlobalVariable *GVDouble = new GlobalVariable( + *M, Type::getDoubleTy(Ctx), true, GlobalValue::ExternalLinkage, nullptr); + VDouble = Builder.CreateLoad(GVDouble->getValueType(), GVDouble); + + Builder.setDefaultConstrainedExcept(fp::ebStrict); + Builder.setDefaultConstrainedRounding(RoundingMode::TowardZero); + Function *Fn = Intrinsic::getDeclaration(M.get(), + Intrinsic::experimental_constrained_roundeven, { Type::getDoubleTy(Ctx) }); + V = Builder.CreateConstrainedFPCall(Fn, { VDouble }); + CII = cast(V); + EXPECT_EQ(Intrinsic::experimental_constrained_roundeven, CII->getIntrinsicID()); + EXPECT_EQ(fp::ebStrict, CII->getExceptionBehavior()); +} + TEST_F(IRBuilderTest, Lifetime) { IRBuilder<> Builder(BB); AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty()); From 6c906f7785dad3a1dea5357cfde0762952c2a2bd Mon Sep 17 00:00:00 2001 From: John Brawn Date: Tue, 26 May 2020 11:30:27 +0100 Subject: [PATCH 092/770] [Sema] Diagnose more cases of static data members in local or unnamed classes We currently diagnose static data members directly contained in unnamed classes, but we should also diagnose when they're in a class that is nested (directly or indirectly) in an unnamed class. Do this by iterating up the list of parent DeclContexts and checking if any is an unnamed class. Similarly also check for function or method DeclContexts (which includes things like blocks and openmp captured statements) as then the class is considered to be a local class, which means static data members aren't allowed. Differential Revision: https://reviews.llvm.org/D80295 --- clang/lib/Sema/SemaDecl.cpp | 30 +++++++++++++++++++------ clang/test/OpenMP/for_loop_messages.cpp | 10 +++++++++ clang/test/SemaCXX/anonymous-struct.cpp | 18 +++++++++++++++ clang/test/SemaCXX/blocks.cpp | 13 +++++++++++ 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 74a4fd8a06de3..6fe48c860864b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6885,18 +6885,34 @@ NamedDecl *Sema::ActOnVariableDeclarator( if (SC == SC_Static && CurContext->isRecord()) { if (const CXXRecordDecl *RD = dyn_cast(DC)) { - // C++ [class.static.data]p2: - // A static data member shall not be a direct member of an unnamed - // or local class - // FIXME: or of a (possibly indirectly) nested class thereof. - if (RD->isLocalClass()) { + // Walk up the enclosing DeclContexts to check for any that are + // incompatible with static data members. + const DeclContext *FunctionOrMethod = nullptr; + const CXXRecordDecl *AnonStruct = nullptr; + for (DeclContext *Ctxt = DC; Ctxt; Ctxt = Ctxt->getParent()) { + if (Ctxt->isFunctionOrMethod()) { + FunctionOrMethod = Ctxt; + break; + } + const CXXRecordDecl *ParentDecl = dyn_cast(Ctxt); + if (ParentDecl && !ParentDecl->getDeclName()) { + AnonStruct = ParentDecl; + break; + } + } + if (FunctionOrMethod) { + // C++ [class.static.data]p5: A local class shall not have static data + // members. Diag(D.getIdentifierLoc(), diag::err_static_data_member_not_allowed_in_local_class) << Name << RD->getDeclName() << RD->getTagKind(); - } else if (!RD->getDeclName()) { + } else if (AnonStruct) { + // C++ [class.static.data]p4: Unnamed classes and classes contained + // directly or indirectly within unnamed classes shall not contain + // static data members. Diag(D.getIdentifierLoc(), diag::err_static_data_member_not_allowed_in_anon_struct) - << Name << RD->getTagKind(); + << Name << AnonStruct->getTagKind(); Invalid = true; } else if (RD->isUnion()) { // C++98 [class.union]p1: If a union contains a static data member, diff --git a/clang/test/OpenMP/for_loop_messages.cpp b/clang/test/OpenMP/for_loop_messages.cpp index 73c69ede6d120..087db755273a2 100644 --- a/clang/test/OpenMP/for_loop_messages.cpp +++ b/clang/test/OpenMP/for_loop_messages.cpp @@ -831,3 +831,13 @@ void test_nowait() { for (int i = 0; i < 16; ++i) ; } + +void test_static_data_member() { +#pragma omp parallel +#pragma omp for + for (int i = 0; i < 16; ++i) { + class X { + static int x; // expected-error {{static data member 'x' not allowed in local class 'X'}} + }; + } +} diff --git a/clang/test/SemaCXX/anonymous-struct.cpp b/clang/test/SemaCXX/anonymous-struct.cpp index 10f6711dd340a..333b8f724f4e1 100644 --- a/clang/test/SemaCXX/anonymous-struct.cpp +++ b/clang/test/SemaCXX/anonymous-struct.cpp @@ -153,3 +153,21 @@ typedef struct { const Empty E; } C; } // namespace ImplicitDecls + +struct { + static int x; // expected-error {{static data member 'x' not allowed in anonymous struct}} +} static_member_1; + +class { + struct A { + static int x; // expected-error {{static data member 'x' not allowed in anonymous class}} + } x; +} static_member_2; + +union { + struct A { + struct B { + static int x; // expected-error {{static data member 'x' not allowed in anonymous union}} + } x; + } x; +} static_member_3; diff --git a/clang/test/SemaCXX/blocks.cpp b/clang/test/SemaCXX/blocks.cpp index aacf63cfab420..5d0aa2af73601 100644 --- a/clang/test/SemaCXX/blocks.cpp +++ b/clang/test/SemaCXX/blocks.cpp @@ -153,3 +153,16 @@ void f() { auto some_block = ^{ (void)s; }; } } + +void static_data_member() { + auto block = ^{ + class X { + static int x; // expected-error {{static data member 'x' not allowed in local class 'X'}} + }; + class Y { + struct Z { + static int z; // expected-error {{static data member 'z' not allowed in local struct 'Z'}} + }; + }; + }; +} From 049c16ba93fa77df7984353b1a0124ed64fc0439 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 26 May 2020 13:23:23 +0100 Subject: [PATCH 093/770] [ARM] MVE VMINV/VMAXV test additions. NFC --- llvm/test/CodeGen/Thumb2/mve-vmaxv.ll | 384 ++++++++++++++++++++++++-- 1 file changed, 360 insertions(+), 24 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll index f96c2f422a3fa..36c201cced56c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll @@ -14,8 +14,8 @@ declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) -define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i32(<16 x i8> %s1) { -; CHECK-LABEL: vmaxv_s_v16i8_i32: +define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) { +; CHECK-LABEL: vmaxv_s_v16i8: ; CHECK: @ %bb.0: ; CHECK-NEXT: mvn r0, #127 ; CHECK-NEXT: vmaxv.s8 r0, q0 @@ -24,8 +24,8 @@ define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i32(<16 x i8> %s1) { ret i8 %r } -define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i32(<8 x i16> %s1) { -; CHECK-LABEL: vmaxv_s_v8i16_i32: +define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16(<8 x i16> %s1) { +; CHECK-LABEL: vmaxv_s_v8i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: movt r0, #65535 @@ -35,8 +35,8 @@ define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i32(<8 x i16> %s1) { ret i16 %r } -define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1) { -; CHECK-LABEL: vmaxv_s_v4i32_i32: +define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32(<4 x i32> %s1) { +; CHECK-LABEL: vmaxv_s_v4i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov.w r0, #-2147483648 ; CHECK-NEXT: vmaxv.s32 r0, q0 @@ -45,8 +45,8 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1) { ret i32 %r } -define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i32(<16 x i8> %s1) { -; CHECK-LABEL: vmaxv_u_v16i8_i32: +define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8(<16 x i8> %s1) { +; CHECK-LABEL: vmaxv_u_v16i8: ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmaxv.u8 r0, q0 @@ -55,8 +55,8 @@ define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i32(<16 x i8> %s1) { ret i8 %r } -define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i32(<8 x i16> %s1) { -; CHECK-LABEL: vmaxv_u_v8i16_i32: +define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16(<8 x i16> %s1) { +; CHECK-LABEL: vmaxv_u_v8i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmaxv.u16 r0, q0 @@ -65,8 +65,8 @@ define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i32(<8 x i16> %s1) { ret i16 %r } -define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1) { -; CHECK-LABEL: vmaxv_u_v4i32_i32: +define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32(<4 x i32> %s1) { +; CHECK-LABEL: vmaxv_u_v4i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmaxv.u32 r0, q0 @@ -75,8 +75,8 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1) { ret i32 %r } -define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i32(<16 x i8> %s1) { -; CHECK-LABEL: vminv_s_v16i8_i32: +define arm_aapcs_vfpcc i8 @vminv_s_v16i8(<16 x i8> %s1) { +; CHECK-LABEL: vminv_s_v16i8: ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r0, #127 ; CHECK-NEXT: vminv.s8 r0, q0 @@ -85,8 +85,8 @@ define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i32(<16 x i8> %s1) { ret i8 %r } -define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i32(<8 x i16> %s1) { -; CHECK-LABEL: vminv_s_v8i16_i32: +define arm_aapcs_vfpcc i16 @vminv_s_v8i16(<8 x i16> %s1) { +; CHECK-LABEL: vminv_s_v8i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r0, #32767 ; CHECK-NEXT: vminv.s16 r0, q0 @@ -95,8 +95,8 @@ define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i32(<8 x i16> %s1) { ret i16 %r } -define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1) { -; CHECK-LABEL: vminv_s_v4i32_i32: +define arm_aapcs_vfpcc i32 @vminv_s_v4i32(<4 x i32> %s1) { +; CHECK-LABEL: vminv_s_v4i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: mvn r0, #-2147483648 ; CHECK-NEXT: vminv.s32 r0, q0 @@ -105,8 +105,8 @@ define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1) { ret i32 %r } -define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i32(<16 x i8> %s1) { -; CHECK-LABEL: vminv_u_v16i8_i32: +define arm_aapcs_vfpcc i8 @vminv_u_v16i8(<16 x i8> %s1) { +; CHECK-LABEL: vminv_u_v16i8: ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r0, #255 ; CHECK-NEXT: vminv.u8 r0, q0 @@ -115,8 +115,8 @@ define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i32(<16 x i8> %s1) { ret i8 %r } -define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i32(<8 x i16> %s1) { -; CHECK-LABEL: vminv_u_v8i16_i32: +define arm_aapcs_vfpcc i16 @vminv_u_v8i16(<8 x i16> %s1) { +; CHECK-LABEL: vminv_u_v8i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r0, #65535 ; CHECK-NEXT: vminv.u16 r0, q0 @@ -125,8 +125,8 @@ define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i32(<8 x i16> %s1) { ret i16 %r } -define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1) { -; CHECK-LABEL: vminv_u_v4i32_i32: +define arm_aapcs_vfpcc i32 @vminv_u_v4i32(<4 x i32> %s1) { +; CHECK-LABEL: vminv_u_v4i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: vminv.u32 r0, q0 @@ -134,3 +134,339 @@ define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1) { %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) ret i32 %r } + + + +define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { +; CHECK-LABEL: vmaxv_s_v16i8_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #127 +; CHECK-NEXT: sxtb r3, r0 +; CHECK-NEXT: vmaxv.s8 r1, q0 +; CHECK-NEXT: sxtb r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) + %c = icmp sgt i8 %r, %s2 + %s = select i1 %c, i8 %r, i8 %s2 + ret i8 %s +} + +define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { +; CHECK-LABEL: vmaxv_s_v16i8_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #127 +; CHECK-NEXT: vmaxv.s8 r1, q0 +; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) + %rs = sext i8 %r to i32 + %c = icmp sgt i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { +; CHECK-LABEL: vmaxv_s_v8i16_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #32768 +; CHECK-NEXT: sxth r3, r0 +; CHECK-NEXT: movt r1, #65535 +; CHECK-NEXT: vmaxv.s16 r1, q0 +; CHECK-NEXT: sxth r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) + %c = icmp sgt i16 %r, %s2 + %s = select i1 %c, i16 %r, i16 %s2 + ret i16 %s +} + +define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { +; CHECK-LABEL: vmaxv_s_v8i16_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #32768 +; CHECK-NEXT: movt r1, #65535 +; CHECK-NEXT: vmaxv.s16 r1, q0 +; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) + %rs = sext i16 %r to i32 + %c = icmp sgt i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { +; CHECK-LABEL: vmaxv_s_v4i32_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #-2147483648 +; CHECK-NEXT: vmaxv.s32 r1, q0 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1) + %c = icmp sgt i32 %r, %s2 + %s = select i1 %c, i32 %r, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { +; CHECK-LABEL: vmaxv_u_v16i8_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: uxtb r3, r0 +; CHECK-NEXT: vmaxv.u8 r1, q0 +; CHECK-NEXT: uxtb r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) + %c = icmp ugt i8 %r, %s2 + %s = select i1 %c, i8 %r, i8 %s2 + ret i8 %s +} + +define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { +; CHECK-LABEL: vmaxv_u_v16i8_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxv.u8 r1, q0 +; CHECK-NEXT: uxtb r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) + %rs = zext i8 %r to i32 + %c = icmp ugt i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { +; CHECK-LABEL: vmaxv_u_v8i16_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: uxth r3, r0 +; CHECK-NEXT: vmaxv.u16 r1, q0 +; CHECK-NEXT: uxth r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) + %c = icmp ugt i16 %r, %s2 + %s = select i1 %c, i16 %r, i16 %s2 + ret i16 %s +} + +define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { +; CHECK-LABEL: vmaxv_u_v8i16_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxv.u16 r1, q0 +; CHECK-NEXT: uxth r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) + %rs = zext i16 %r to i32 + %c = icmp ugt i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { +; CHECK-LABEL: vmaxv_u_v4i32_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxv.u32 r1, q0 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1) + %c = icmp ugt i32 %r, %s2 + %s = select i1 %c, i32 %r, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { +; CHECK-LABEL: vminv_s_v16i8_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #127 +; CHECK-NEXT: sxtb r3, r0 +; CHECK-NEXT: vminv.s8 r1, q0 +; CHECK-NEXT: sxtb r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) + %c = icmp slt i8 %r, %s2 + %s = select i1 %c, i8 %r, i8 %s2 + ret i8 %s +} + +define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { +; CHECK-LABEL: vminv_s_v16i8_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #127 +; CHECK-NEXT: vminv.s8 r1, q0 +; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) + %rs = sext i8 %r to i32 + %c = icmp slt i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { +; CHECK-LABEL: vminv_s_v8i16_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #32767 +; CHECK-NEXT: sxth r3, r0 +; CHECK-NEXT: vminv.s16 r1, q0 +; CHECK-NEXT: sxth r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) + %c = icmp slt i16 %r, %s2 + %s = select i1 %c, i16 %r, i16 %s2 + ret i16 %s +} + +define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { +; CHECK-LABEL: vminv_s_v8i16_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #32767 +; CHECK-NEXT: vminv.s16 r1, q0 +; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) + %rs = sext i16 %r to i32 + %c = icmp slt i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { +; CHECK-LABEL: vminv_s_v4i32_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #-2147483648 +; CHECK-NEXT: vminv.s32 r1, q0 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1) + %c = icmp slt i32 %r, %s2 + %s = select i1 %c, i32 %r, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { +; CHECK-LABEL: vminv_u_v16i8_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #255 +; CHECK-NEXT: uxtb r3, r0 +; CHECK-NEXT: vminv.u8 r1, q0 +; CHECK-NEXT: uxtb r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) + %c = icmp ult i8 %r, %s2 + %s = select i1 %c, i8 %r, i8 %s2 + ret i8 %s +} + +define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { +; CHECK-LABEL: vminv_u_v16i8_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #255 +; CHECK-NEXT: vminv.u8 r1, q0 +; CHECK-NEXT: uxtb r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: bx lr + %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) + %rs = zext i8 %r to i32 + %c = icmp ult i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { +; CHECK-LABEL: vminv_u_v8i16_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: uxth r3, r0 +; CHECK-NEXT: vminv.u16 r1, q0 +; CHECK-NEXT: uxth r2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) + %c = icmp ult i16 %r, %s2 + %s = select i1 %c, i16 %r, i16 %s2 + ret i16 %s +} + +define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { +; CHECK-LABEL: vminv_u_v8i16_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vminv.u16 r1, q0 +; CHECK-NEXT: uxth r1, r1 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: bx lr + %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) + %rs = zext i16 %r to i32 + %c = icmp ult i32 %rs, %s2 + %s = select i1 %c, i32 %rs, i32 %s2 + ret i32 %s +} + +define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { +; CHECK-LABEL: vminv_u_v4i32_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: vminv.u32 r1, q0 +; CHECK-NEXT: cmp r1, r0 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) + %c = icmp ult i32 %r, %s2 + %s = select i1 %c, i32 %r, i32 %s2 + ret i32 %s +} From ff2743bf047deac7ef6cc6c3efd30ff05e55b2ad Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Tue, 19 May 2020 14:26:46 -0400 Subject: [PATCH 094/770] [libTooling] In Transformer, allow atomic changes to span multiple files. Summary: Currently, all changes returned by a single application of a rule must fit in one atomic change and therefore must apply to one file. However, there are patterns in which a single rule will want to modify multiple files; for example, a header and implementation to change a declaration and its definition. This patch relaxes Transformer, libTooling's interpreter of RewriteRules, to support multiple changes. Reviewers: gribozavr Subscribers: mgrang, jfb, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80239 --- clang/lib/Tooling/Transformer/Transformer.cpp | 42 ++++++++++++------- clang/unittests/Tooling/TransformerTest.cpp | 42 +++++++++++++++++++ 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/clang/lib/Tooling/Transformer/Transformer.cpp b/clang/lib/Tooling/Transformer/Transformer.cpp index 93c2c0912d213..71340bf2f676d 100644 --- a/clang/lib/Tooling/Transformer/Transformer.cpp +++ b/clang/lib/Tooling/Transformer/Transformer.cpp @@ -12,6 +12,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/AtomicChange.h" #include "llvm/Support/Error.h" +#include #include #include @@ -45,28 +46,39 @@ void Transformer::run(const MatchFinder::MatchResult &Result) { return; } - // Record the results in the AtomicChange, anchored at the location of the - // first change. - AtomicChange AC(*Result.SourceManager, - (*Transformations)[0].Range.getBegin()); + // Group the transformations, by file, into AtomicChanges, each anchored by + // the location of the first change in that file. + std::map ChangesByFileID; for (const auto &T : *Transformations) { + auto ID = Result.SourceManager->getFileID(T.Range.getBegin()); + auto Iter = ChangesByFileID + .emplace(ID, AtomicChange(*Result.SourceManager, + T.Range.getBegin())) + .first; + auto &AC = Iter->second; if (auto Err = AC.replace(*Result.SourceManager, T.Range, T.Replacement)) { Consumer(std::move(Err)); return; } } - for (const auto &I : Case.AddedIncludes) { - auto &Header = I.first; - switch (I.second) { - case transformer::IncludeFormat::Quoted: - AC.addHeader(Header); - break; - case transformer::IncludeFormat::Angled: - AC.addHeader((llvm::Twine("<") + Header + ">").str()); - break; + for (auto &IDChangePair : ChangesByFileID) { + auto &AC = IDChangePair.second; + // FIXME: this will add includes to *all* changed files, which may not be + // the intent. We should upgrade the representation to allow associating + // headers with specific edits. + for (const auto &I : Case.AddedIncludes) { + auto &Header = I.first; + switch (I.second) { + case transformer::IncludeFormat::Quoted: + AC.addHeader(Header); + break; + case transformer::IncludeFormat::Angled: + AC.addHeader((llvm::Twine("<") + Header + ">").str()); + break; + } } - } - Consumer(std::move(AC)); + Consumer(std::move(AC)); + } } diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp index 1d955cf5e9b80..c8c6db059fedf 100644 --- a/clang/unittests/Tooling/TransformerTest.cpp +++ b/clang/unittests/Tooling/TransformerTest.cpp @@ -817,4 +817,46 @@ TEST(TransformerDeathTest, OrderedRuleTypes) { "Matcher must be.*node matcher"); } #endif + +// Edits are able to span multiple files; in this case, a header and an +// implementation file. +TEST_F(TransformerTest, MultipleFiles) { + std::string Header = R"cc(void RemoveThisFunction();)cc"; + std::string Source = R"cc(#include "input.h" + void RemoveThisFunction();)cc"; + Transformer T( + makeRule(functionDecl(hasName("RemoveThisFunction")), changeTo(cat(""))), + consumer()); + T.registerMatchers(&MatchFinder); + auto Factory = newFrontendActionFactory(&MatchFinder); + EXPECT_TRUE(runToolOnCodeWithArgs( + Factory->create(), Source, std::vector(), "input.cc", + "clang-tool", std::make_shared(), + {{"input.h", Header}})); + + std::sort(Changes.begin(), Changes.end(), + [](const AtomicChange &L, const AtomicChange &R) { + return L.getFilePath() < R.getFilePath(); + }); + + ASSERT_EQ(Changes[0].getFilePath(), "./input.h"); + EXPECT_THAT(Changes[0].getInsertedHeaders(), IsEmpty()); + EXPECT_THAT(Changes[0].getRemovedHeaders(), IsEmpty()); + llvm::Expected UpdatedCode = + clang::tooling::applyAllReplacements(Header, + Changes[0].getReplacements()); + ASSERT_TRUE(static_cast(UpdatedCode)) + << "Could not update code: " << llvm::toString(UpdatedCode.takeError()); + EXPECT_EQ(format(*UpdatedCode), format(R"cc(;)cc")); + + ASSERT_EQ(Changes[1].getFilePath(), "input.cc"); + EXPECT_THAT(Changes[1].getInsertedHeaders(), IsEmpty()); + EXPECT_THAT(Changes[1].getRemovedHeaders(), IsEmpty()); + UpdatedCode = clang::tooling::applyAllReplacements( + Source, Changes[1].getReplacements()); + ASSERT_TRUE(static_cast(UpdatedCode)) + << "Could not update code: " << llvm::toString(UpdatedCode.takeError()); + EXPECT_EQ(format(*UpdatedCode), format(R"cc(#include "input.h" + ;)cc")); +} } // namespace From a3b5ccddcc3512432fc386b9197e6f103e190894 Mon Sep 17 00:00:00 2001 From: Tharindu Rusira Date: Tue, 26 May 2020 00:48:06 -0600 Subject: [PATCH 095/770] Update DialectConversion.md line 164: typo? baz.add should be bar.add. `bar.add` -> `foo.add` --- mlir/docs/DialectConversion.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index 0835527ae201f..1f9ec14b6a96c 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -161,7 +161,7 @@ a set of legal ones. As an example, say you define a target that supports one operation: `foo.add`. When providing the following patterns: [`bar.add` -> `baz.add`, `baz.add` -> `foo.add`], the framework will automatically detect that it can legalize -`baz.add` -> `foo.add` even though a direct conversion does not exist. This +`bar.add` -> `foo.add` even though a direct conversion does not exist. This means that you don’t have to define a direct legalization pattern for `bar.add` -> `foo.add`. From 9578a54f5007e8a02cef449dd151da27837b388e Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 26 May 2020 09:16:54 -0400 Subject: [PATCH 096/770] [mlir][Vector] Add vector contraction to outerproduct lowering This revision adds the additional lowering and exposes the patterns at a finer granularity for better programmatic reuse. The unit test makes use of the finer grained pattern for simpler checks. As the ContractionOpLowering is exposed programmatically, cleanup opportunities appear and static class methods are turned into free functions with static visibility. Differential Revision: https://reviews.llvm.org/D80375 --- mlir/include/mlir/Dialect/Vector/VectorOps.h | 21 +- mlir/include/mlir/Dialect/Vector/VectorOps.td | 5 + .../mlir/Dialect/Vector/VectorTransforms.h | 117 ++- mlir/lib/Dialect/Vector/VectorOps.cpp | 7 + mlir/lib/Dialect/Vector/VectorTransforms.cpp | 751 ++++++++++-------- .../Vector/vector-contract-transforms.mlir | 34 +- .../lib/Transforms/TestVectorTransforms.cpp | 19 +- 7 files changed, 598 insertions(+), 356 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index 6394fae213750..423c72da64712 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -25,13 +25,6 @@ class MLIRContext; class OwningRewritePatternList; namespace vector { -/// Structure to control the behavior of vector transform patterns. -struct VectorTransformsOptions { - /// Let vector.contract lower to vector.matrix_multiply and LLVM matrix - /// intrinsics. - bool lowerToLLVMMatrixIntrinsics = false; -}; - /// Collect a set of vector-to-vector canonicalization patterns. void populateVectorToVectorCanonicalizationPatterns( OwningRewritePatternList &patterns, MLIRContext *context); @@ -51,6 +44,20 @@ void populateVectorToVectorTransformationPatterns( void populateVectorSlicesLoweringPatterns(OwningRewritePatternList &patterns, MLIRContext *context); +/// Enum to control the lowering of `vector.contract` operations. +enum class VectorContractLowering { + /// Progressively lower to finer grained `vector.contract` and `vector.fma`. + FMA = 0, + /// Lower to `vector.matrix_multiply`, maps 1-1 to LLVM matrix intrinsics. + Matmul = 1, + /// Lower to `vector.outerproduct`. + OuterProduct = 2, +}; +/// Structure to control the behavior of vector transform patterns. +struct VectorTransformsOptions { + VectorContractLowering vectorContractLowering = VectorContractLowering::FMA; +}; + /// Collect a set of transformation patterns that are related to contracting /// or expanding vector operations: /// ContractionOpLowering, diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 264c8ad034c82..1b978e44dd6ab 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -686,6 +686,11 @@ def Vector_OuterProductOp : return %3: vector<4x8xf32> ``` }]; + let builders = [ + // Build an op without mask, use the type of `acc` as the return type. + OpBuilder< + "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " + "Value acc">]; let extraClassDeclaration = [{ VectorType getOperandVectorTypeLHS() { return lhs().getType().cast(); diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index 337ac75f7cbbc..08aa579d651b4 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -9,6 +9,7 @@ #ifndef DIALECT_VECTOR_VECTORTRANSFORMS_H_ #define DIALECT_VECTOR_VECTORTRANSFORMS_H_ +#include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/PatternMatch.h" namespace mlir { @@ -22,13 +23,6 @@ void populateVectorToVectorConversionPatterns( ArrayRef coarseVectorShape = {}, ArrayRef fineVectorShape = {}); -//////////////////////////////////////////////////////////////////////////////// -// The following Declarative Rewrite Rule (DRR) helpers are used in rewrite -// patterns. As such, they must not call into `rewriter.erase/replace` APIs and -// it is the responsibility of the enclosing PatternRewriter to erase on -// success. -//////////////////////////////////////////////////////////////////////////////// - namespace vector { // Entry point for unrolling declarative pattern rewrites. @@ -69,6 +63,115 @@ unrollSingleResultOpMatchingType(OpBuilder &builder, Operation *op, ArrayRef targetShape); } // namespace vector + +//===----------------------------------------------------------------------===// +// Finer-grained patterns exposed for more control over individual lowerings. +//===----------------------------------------------------------------------===// + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to: +/// ``` +/// %flattened_a = vector.shape_cast %a +/// %flattened_b = vector.shape_cast %b +/// %flattened_d = vector.matmul %flattened_a, %flattened_b +/// %d = vector.shape_cast %%flattened_d +/// %e = add %c, %d +/// ``` +/// `vector.matmul` later lowers to `llvm.matrix.multiply`. +// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +class ContractionOpToMatmulOpLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + ContractionOpToMatmulOpLowering( + vector::VectorTransformsOptions vectorTransformsOptions, + MLIRContext *context) + : OpRewritePattern(context), + vectorTransformsOptions(vectorTransformsOptions) {} + + LogicalResult match(vector::ContractionOp op) const override; + void rewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformsOptions; +}; + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to a reduction_size-unrolled sequence: +/// ``` +/// %at = vector.transpose %a, [1, 0] +/// %bRow0 = vector.extract %b[0] +/// %atRow0 = vector.extract %at[0] +/// %c0 = vector.outerproduct %atRow0, %bRow0, %c +/// ... +/// %bRowK = vector.extract %b[K] +/// %atRowK = vector.extract %at[K] +/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +class ContractionOpToOuterProductOpLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + ContractionOpToOuterProductOpLowering( + vector::VectorTransformsOptions vectorTransformsOptions, + MLIRContext *context) + : OpRewritePattern(context), + vectorTransformsOptions(vectorTransformsOptions) {} + + LogicalResult match(vector::ContractionOp op) const override; + void rewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformsOptions; +}; + +/// Progressive lowering of ContractionOp. +/// +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// .. +/// %x = combine %a %b .. +/// until a pure contraction is reached (no free/batch dimensions), +/// which is replaced by a fma/reduction op. +/// +/// This only kicks in when either VectorTransformsOptions is set to FMA or when +/// other contraction patterns fail. +class ContractionOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + ContractionOpLowering(vector::VectorTransformsOptions vectorTransformsOptions, + MLIRContext *context) + : OpRewritePattern(context), + vectorTransformsOptions(vectorTransformsOptions) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformsOptions; + // Lower one parallel dimension. + Value lowerParallel(vector::ContractionOp op, int64_t lhsIndex, + int64_t rhsIndex, PatternRewriter &rewriter) const; + // Lower one reduction dimension. + Value lowerReduction(vector::ContractionOp op, + PatternRewriter &rewriter) const; +}; + } // namespace mlir #endif // DIALECT_VECTOR_VECTORTRANSFORMS_H_ diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 5439233c96b15..1574edb344941 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -957,6 +957,13 @@ static LogicalResult verify(InsertStridedSliceOp op) { // OuterProductOp //===----------------------------------------------------------------------===// +/// Build an op without mask, use the type of `acc` as the return type. +void OuterProductOp::build(OpBuilder &builder, OperationState &result, + Value lhs, Value rhs, Value acc) { + result.addOperands({lhs, rhs, acc}); + result.addTypes(acc.getType()); +} + static void print(OpAsmPrinter &p, OuterProductOp op) { p << op.getOperationName() << " " << op.lhs() << ", " << op.rhs(); if (!op.acc().empty()) diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 1c1de155d8b63..44ff03a04f223 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -39,6 +39,120 @@ using namespace mlir; using llvm::dbgs; +// Helper to find an index in an affine map. +static Optional getResultIndex(AffineMap map, int64_t index) { + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getResult(i).cast().getPosition(); + if (idx == index) + return i; + } + return None; +} + +// Helper to construct iterator types with one index removed. +static SmallVector adjustIter(ArrayAttr iteratorTypes, + int64_t index) { + SmallVector results; + for (auto it : llvm::enumerate(iteratorTypes)) { + int64_t idx = it.index(); + if (idx == index) + continue; + results.push_back(it.value()); + } + return results; +} + +// Helper to construct an affine map with one index removed. +static AffineMap adjustMap(AffineMap map, int64_t index, + PatternRewriter &rewriter) { + auto *ctx = rewriter.getContext(); + SmallVector results; + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getResult(i).cast().getPosition(); + if (idx == index) + continue; + // Re-insert remaining indices, but renamed when occurring + // after the removed index. + auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx); + results.push_back(targetExpr); + } + return AffineMap::get(map.getNumDims() - 1, 0, results, ctx); +} + +// Helper to drop dimension from vector type. +static Type adjustType(VectorType tp, int64_t index) { + int64_t rank = tp.getRank(); + Type eltType = tp.getElementType(); + if (rank == 1) { + assert(index == 0 && "index for scalar result out of bounds"); + return eltType; + } + SmallVector adjustedShape; + for (int64_t i = 0; i < rank; ++i) { + // Omit dimension at the given index. + if (i == index) + continue; + // Otherwise, add dimension back. + adjustedShape.push_back(tp.getDimSize(i)); + } + return VectorType::get(adjustedShape, eltType); +} + +// Helper method to possibly drop a dimension in a load. +// TODO(ajcbik): use a reshaping vector load (and share lowering code) +static Value reshapeLoad(Location loc, Value val, VectorType type, + int64_t index, int64_t pos, + PatternRewriter &rewriter) { + if (index == -1) + return val; + Type lowType = adjustType(type, 0); + // At extraction dimension? + if (index == 0) { + auto posAttr = rewriter.getI64ArrayAttr(pos); + return rewriter.create(loc, lowType, val, posAttr); + } + // Unroll leading dimensions. + VectorType vType = lowType.cast(); + VectorType resType = adjustType(type, index).cast(); + Value result = + rewriter.create(loc, resType, rewriter.getZeroAttr(resType)); + for (int64_t d = 0, e = resType.getDimSize(0); d < e; d++) { + auto posAttr = rewriter.getI64ArrayAttr(d); + Value ext = rewriter.create(loc, vType, val, posAttr); + Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter); + result = + rewriter.create(loc, resType, load, result, posAttr); + } + return result; +} + +// Helper method to possibly drop a dimension in a store. +// TODO(ajcbik): use a reshaping vector store (and share lowering code) +static Value reshapeStore(Location loc, Value val, Value result, + VectorType type, int64_t index, int64_t pos, + PatternRewriter &rewriter) { + // Unmodified? + if (index == -1) + return val; + // At insertion dimension? + if (index == 0) { + auto posAttr = rewriter.getI64ArrayAttr(pos); + return rewriter.create(loc, type, val, result, posAttr); + } + // Unroll leading dimensions. + Type lowType = adjustType(type, 0); + VectorType vType = lowType.cast(); + Type insType = adjustType(vType, 0); + for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) { + auto posAttr = rewriter.getI64ArrayAttr(d); + Value ext = rewriter.create(loc, vType, result, posAttr); + Value ins = rewriter.create(loc, insType, val, posAttr); + Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter); + result = rewriter.create(loc, type, sto, result, posAttr); + } + return result; +} + // Clones `op` into a new operations that takes `operands` and returns // `resultTypes`. static Operation *cloneOpWithOperandsAndTypes(OpBuilder &builder, Location loc, @@ -1252,343 +1366,6 @@ class CreateMaskOpLowering : public OpRewritePattern { } }; -/// Progressive lowering of ContractionOp. -/// One: -/// %x = vector.contract with at least one free/batch dimension -/// is replaced by: -/// %a = vector.contract with one less free/batch dimension -/// %b = vector.contract with one less free/batch dimension -/// .. -/// %x = combine %a %b .. -/// until a pure contraction is reached (no free/batch dimensions), -/// which is replaced by a fma/reduction op. -/// -/// TODO(ajcbik): break down into transpose/reshape/cast ops -/// when they become available to avoid code dup -/// TODO(ajcbik): investigate lowering order impact on performance -class ContractionOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - ContractionOpLowering(vector::VectorTransformsOptions vectorTransformsOptions, - MLIRContext *context) - : OpRewritePattern(context), - vectorTransformsOptions(vectorTransformsOptions) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override { - // TODO(ajcbik): implement masks - if (llvm::size(op.masks()) != 0) - return failure(); - - // TODO(ntv, ajcbik): implement benefits, cost models, separate this out in - // a new pattern. - if (vectorTransformsOptions.lowerToLLVMMatrixIntrinsics && - isRowMajorMatmul(op.indexing_maps())) { - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - unsigned lhsRows = op.getLhsType().getShape()[0]; - unsigned lhsColumns = op.getLhsType().getShape()[1]; - unsigned rhsColumns = op.getRhsType().getShape()[1]; - - Type flattenedLHSType = - VectorType::get(lhsType.getNumElements(), lhsType.getElementType()); - Type flattenedRHSType = - VectorType::get(rhsType.getNumElements(), rhsType.getElementType()); - auto lhs = rewriter.create( - op.getLoc(), flattenedLHSType, op.lhs()); - auto rhs = rewriter.create( - op.getLoc(), flattenedRHSType, op.rhs()); - - Value mul = rewriter.create( - op.getLoc(), lhs, rhs, lhsRows, lhsColumns, rhsColumns); - mul = rewriter.create(op.getLoc(), - op.acc().getType(), mul); - Type elementType = op.getLhsType().getElementType(); - assert(elementType.isIntOrFloat()); - if (elementType.isa()) - rewriter.replaceOpWithNewOp(op, op.acc(), mul); - else - rewriter.replaceOpWithNewOp(op, op.acc(), mul); - return success(); - } - - // Find first batch dimension in LHS/RHS, and lower when found. - std::vector> batchDimMap = op.getBatchDimMap(); - if (!batchDimMap.empty()) { - int64_t lhsIndex = batchDimMap[0].first; - int64_t rhsIndex = batchDimMap[0].second; - rewriter.replaceOp(op, lowerParallel(op, lhsIndex, rhsIndex, rewriter)); - return success(); - } - - // Collect contracting dimensions. - std::vector> contractingDimMap = - op.getContractingDimMap(); - DenseSet lhsContractingDimSet; - DenseSet rhsContractingDimSet; - for (auto &dimPair : contractingDimMap) { - lhsContractingDimSet.insert(dimPair.first); - rhsContractingDimSet.insert(dimPair.second); - } - - // Find first free dimension in LHS, and lower when found. - VectorType lhsType = op.getLhsType(); - for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e; - ++lhsIndex) { - if (lhsContractingDimSet.count(lhsIndex) == 0) { - rewriter.replaceOp( - op, lowerParallel(op, lhsIndex, /*rhsIndex=*/-1, rewriter)); - return success(); - } - } - - // Find first free dimension in RHS, and lower when found. - VectorType rhsType = op.getRhsType(); - for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e; - ++rhsIndex) { - if (rhsContractingDimSet.count(rhsIndex) == 0) { - rewriter.replaceOp( - op, lowerParallel(op, /*lhsIndex=*/-1, rhsIndex, rewriter)); - return success(); - } - } - - // Lower the first remaining reduction dimension. - if (!contractingDimMap.empty()) { - rewriter.replaceOp(op, lowerReduction(op, rewriter)); - return success(); - } - - return failure(); - } - -private: - // Lower one parallel dimension. - // TODO(ajcbik): consider reusing existing contract unrolling - Value lowerParallel(vector::ContractionOp op, int64_t lhsIndex, - int64_t rhsIndex, PatternRewriter &rewriter) const { - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - VectorType resType = op.getResultType().cast(); - // Find the iterator type index and result index. - SmallVector iMap = op.getIndexingMaps(); - int64_t iterIndex = -1; - int64_t dimSize = -1; - if (lhsIndex >= 0) { - iterIndex = - iMap[0].getResult(lhsIndex).cast().getPosition(); - assert((rhsIndex < 0 || iterIndex == iMap[1] - .getResult(rhsIndex) - .cast() - .getPosition()) && - "parallel index should be free in LHS or batch in LHS/RHS"); - dimSize = lhsType.getDimSize(lhsIndex); - } else { - assert(rhsIndex >= 0 && "missing parallel index"); - iterIndex = - iMap[1].getResult(rhsIndex).cast().getPosition(); - dimSize = rhsType.getDimSize(rhsIndex); - } - assert(iterIndex >= 0 && "parallel index not listed in operand mapping"); - Optional lookup = getResultIndex(iMap[2], iterIndex); - assert(lookup.hasValue() && "parallel index not listed in reduction"); - int64_t resIndex = lookup.getValue(); - // Construct new iterator types and affine map array attribute. - SmallVector lowIndexingMaps; - lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter)); - auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); - auto lowIter = - rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex)); - // Unroll into a series of lower dimensional vector.contract ops. - Location loc = op.getLoc(); - Value result = rewriter.create(loc, resType, - rewriter.getZeroAttr(resType)); - for (int64_t d = 0; d < dimSize; ++d) { - auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter); - auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter); - auto acc = reshapeLoad(loc, op.acc(), resType, resIndex, d, rewriter); - Value lowContract = rewriter.create( - loc, lhs, rhs, acc, lowAffine, lowIter); - result = reshapeStore(loc, lowContract, result, resType, resIndex, d, - rewriter); - } - return result; - } - - // Lower one reduction dimension. - Value lowerReduction(vector::ContractionOp op, - PatternRewriter &rewriter) const { - auto loc = op.getLoc(); - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - Type resType = op.getResultType(); - assert(!resType.isa()); - // Use iterator index 0. - int64_t iterIndex = 0; - SmallVector iMap = op.getIndexingMaps(); - Optional lookupLhs = getResultIndex(iMap[0], iterIndex); - Optional lookupRhs = getResultIndex(iMap[1], iterIndex); - assert(lookupLhs.hasValue() && "missing LHS parallel index"); - assert(lookupRhs.hasValue() && "missing RHS parallel index"); - int64_t lhsIndex = lookupLhs.getValue(); - int64_t rhsIndex = lookupRhs.getValue(); - int64_t dimSize = lhsType.getDimSize(lhsIndex); - assert(dimSize == rhsType.getDimSize(rhsIndex) && "corrupt shape"); - // Base case. - if (lhsType.getRank() == 1) { - assert(rhsType.getRank() == 1 && "corrupt contraction"); - Value zero = rewriter.create(loc, lhsType, - rewriter.getZeroAttr(lhsType)); - Value fma = rewriter.create(loc, op.lhs(), op.rhs(), zero); - StringAttr kind = rewriter.getStringAttr("add"); - return rewriter.create(loc, resType, kind, fma, - op.acc()); - } - // Construct new iterator types and affine map array attribute. - SmallVector lowIndexingMaps; - lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter)); - auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); - auto lowIter = - rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex)); - // Unroll into a series of lower dimensional vector.contract ops. - // By feeding the initial accumulator into the first contraction, - // and the result of each contraction into the next, eventually - // the sum of all reductions is computed. - Value result = op.acc(); - for (int64_t d = 0; d < dimSize; ++d) { - auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter); - auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter); - result = rewriter.create(loc, lhs, rhs, result, - lowAffine, lowIter); - } - return result; - } - - // Helper to find an index in an affine map. - static Optional getResultIndex(AffineMap map, int64_t index) { - for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { - int64_t idx = map.getResult(i).cast().getPosition(); - if (idx == index) - return i; - } - return None; - } - - // Helper to construct iterator types with one index removed. - static SmallVector adjustIter(ArrayAttr iteratorTypes, - int64_t index) { - SmallVector results; - for (auto it : llvm::enumerate(iteratorTypes)) { - int64_t idx = it.index(); - if (idx == index) - continue; - results.push_back(it.value()); - } - return results; - } - - // Helper to construct an affine map with one index removed. - static AffineMap adjustMap(AffineMap map, int64_t index, - PatternRewriter &rewriter) { - auto *ctx = rewriter.getContext(); - SmallVector results; - for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { - int64_t idx = map.getResult(i).cast().getPosition(); - if (idx == index) - continue; - // Re-insert remaining indices, but renamed when occurring - // after the removed index. - auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx); - results.push_back(targetExpr); - } - return AffineMap::get(map.getNumDims() - 1, 0, results, ctx); - } - - // Helper to drop dimension from vector type. - static Type adjustType(VectorType tp, int64_t index) { - int64_t rank = tp.getRank(); - Type eltType = tp.getElementType(); - if (rank == 1) { - assert(index == 0 && "index for scalar result out of bounds"); - return eltType; - } - SmallVector adjustedShape; - for (int64_t i = 0; i < rank; ++i) { - // Omit dimension at the given index. - if (i == index) - continue; - // Otherwise, add dimension back. - adjustedShape.push_back(tp.getDimSize(i)); - } - return VectorType::get(adjustedShape, eltType); - } - - // Helper method to possibly drop a dimension in a load. - // TODO(ajcbik): use a reshaping vector load (and share lowering code) - static Value reshapeLoad(Location loc, Value val, VectorType type, - int64_t index, int64_t pos, - PatternRewriter &rewriter) { - if (index == -1) - return val; - Type lowType = adjustType(type, 0); - // At extraction dimension? - if (index == 0) { - auto posAttr = rewriter.getI64ArrayAttr(pos); - return rewriter.create(loc, lowType, val, posAttr); - } - // Unroll leading dimensions. - VectorType vType = lowType.cast(); - VectorType resType = adjustType(type, index).cast(); - Value result = rewriter.create(loc, resType, - rewriter.getZeroAttr(resType)); - for (int64_t d = 0, e = resType.getDimSize(0); d < e; d++) { - auto posAttr = rewriter.getI64ArrayAttr(d); - Value ext = rewriter.create(loc, vType, val, posAttr); - Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter); - result = rewriter.create(loc, resType, load, result, - posAttr); - } - return result; - } - - // Helper method to possibly drop a dimension in a store. - // TODO(ajcbik): use a reshaping vector store (and share lowering code) - static Value reshapeStore(Location loc, Value val, Value result, - VectorType type, int64_t index, int64_t pos, - PatternRewriter &rewriter) { - // Unmodified? - if (index == -1) - return val; - // At insertion dimension? - if (index == 0) { - auto posAttr = rewriter.getI64ArrayAttr(pos); - return rewriter.create(loc, type, val, result, posAttr); - } - // Unroll leading dimensions. - Type lowType = adjustType(type, 0); - VectorType vType = lowType.cast(); - Type insType = adjustType(vType, 0); - for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) { - auto posAttr = rewriter.getI64ArrayAttr(d); - Value ext = - rewriter.create(loc, vType, result, posAttr); - Value ins = - rewriter.create(loc, insType, val, posAttr); - Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter); - result = - rewriter.create(loc, type, sto, result, posAttr); - } - return result; - } - - vector::VectorTransformsOptions vectorTransformsOptions; -}; - /// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D /// vectors progressively on the way to target llvm.matrix intrinsics. /// This iterates over the most major dimension of the 2-D vector and performs @@ -1656,6 +1433,302 @@ class ShapeCastOp2DUpCastRewritePattern } // namespace +namespace mlir { + +/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to: +/// ``` +/// %flattened_a = vector.shape_cast %a +/// %flattened_b = vector.shape_cast %b +/// %flattened_d = vector.matmul %flattened_a, %flattened_b +/// %d = vector.shape_cast %%flattened_d +/// %e = add %c, %d +/// ``` +/// `vector.matmul` later lowers to `llvm.matrix.multiply`. +// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +LogicalResult +ContractionOpToMatmulOpLowering::match(vector::ContractionOp op) const { + // TODO(ajcbik): implement masks + if (llvm::size(op.masks()) != 0) + return failure(); + + if (vectorTransformsOptions.vectorContractLowering != + vector::VectorContractLowering::Matmul || + !isRowMajorMatmul(op.indexing_maps())) + return failure(); + return success(); +} + +void ContractionOpToMatmulOpLowering::rewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + unsigned lhsRows = op.getLhsType().getShape()[0]; + unsigned lhsColumns = op.getLhsType().getShape()[1]; + unsigned rhsColumns = op.getRhsType().getShape()[1]; + + Type flattenedLHSType = + VectorType::get(lhsType.getNumElements(), lhsType.getElementType()); + Type flattenedRHSType = + VectorType::get(rhsType.getNumElements(), rhsType.getElementType()); + auto lhs = rewriter.create(op.getLoc(), flattenedLHSType, + op.lhs()); + auto rhs = rewriter.create(op.getLoc(), flattenedRHSType, + op.rhs()); + + Value mul = rewriter.create(op.getLoc(), lhs, rhs, lhsRows, + lhsColumns, rhsColumns); + mul = rewriter.create(op.getLoc(), op.acc().getType(), + mul); + Type elementType = op.getLhsType().getElementType(); + assert(elementType.isIntOrFloat()); + if (elementType.isa()) + rewriter.replaceOpWithNewOp(op, op.acc(), mul); + else + rewriter.replaceOpWithNewOp(op, op.acc(), mul); +} + +/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to a reduction_size-unrolled sequence: +/// ``` +/// %at = vector.transpose %a, [1, 0] +/// %bRow0 = vector.extract %b[0] +/// %atRow0 = vector.extract %at[0] +/// %c0 = vector.outerproduct %atRow0, %bRow0, %c +/// ... +/// %bRowK = vector.extract %b[K] +/// %atRowK = vector.extract %at[K] +/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +void ContractionOpToOuterProductOpLowering::rewrite( + vector::ContractionOp op, PatternRewriter &rewriter) const { + VectorType lhsType = op.getLhsType(); + // TODO(ntv) other modes. + // We know we are in row-major. + bool transposeLhs = false; + unsigned reductionSize = + transposeLhs ? lhsType.getShape()[0] : lhsType.getShape()[1]; + + // If transposeLhs == false (i.e. lhs(m, reductionSize)), we need to + // transpose it to extract the proper vector. Otherwise, just take + // the lhs. + Value lhs = transposeLhs + ? op.lhs() + : rewriter.create( + op.getLoc(), op.lhs(), ArrayRef{1, 0}); + Value res = op.acc(); + // ExtractOp does not allow dynamic indexing, we must unroll explicitly. + for (unsigned k = 0; k < reductionSize; ++k) { + Value a = rewriter.create(op.getLoc(), lhs, k); + Value b = rewriter.create(op.getLoc(), op.rhs(), k); + res = rewriter.create(op.getLoc(), a, b, res); + } + rewriter.replaceOp(op, res); +} + +LogicalResult +ContractionOpToOuterProductOpLowering ::match(vector::ContractionOp op) const { + // TODO(ajcbik): implement masks + if (llvm::size(op.masks()) != 0) + return failure(); + + if (vectorTransformsOptions.vectorContractLowering != + vector::VectorContractLowering::OuterProduct || + !isRowMajorMatmul(op.indexing_maps())) + return failure(); + return success(); +} + +/// Progressive lowering of ContractionOp. +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// .. +/// %x = combine %a %b .. +/// until a pure contraction is reached (no free/batch dimensions), +/// which is replaced by a fma/reduction op. +/// +/// TODO(ajcbik): break down into transpose/reshape/cast ops +/// when they become available to avoid code dup +/// TODO(ajcbik): investigate lowering order impact on performance +LogicalResult +ContractionOpLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + + // TODO(ajcbik): implement masks. + if (llvm::size(op.masks()) != 0) + return failure(); + + // TODO(ntv, ajcbik): implement benefits, cost models. + MLIRContext *ctx = op.getContext(); + ContractionOpToMatmulOpLowering pat1(vectorTransformsOptions, ctx); + if (succeeded(pat1.match(op))) + return failure(); + ContractionOpToOuterProductOpLowering pat2(vectorTransformsOptions, ctx); + if (succeeded(pat2.match(op))) + return failure(); + + // Find first batch dimension in LHS/RHS, and lower when found. + std::vector> batchDimMap = op.getBatchDimMap(); + if (!batchDimMap.empty()) { + int64_t lhsIndex = batchDimMap[0].first; + int64_t rhsIndex = batchDimMap[0].second; + rewriter.replaceOp(op, lowerParallel(op, lhsIndex, rhsIndex, rewriter)); + return success(); + } + + // Collect contracting dimensions. + std::vector> contractingDimMap = + op.getContractingDimMap(); + DenseSet lhsContractingDimSet; + DenseSet rhsContractingDimSet; + for (auto &dimPair : contractingDimMap) { + lhsContractingDimSet.insert(dimPair.first); + rhsContractingDimSet.insert(dimPair.second); + } + + // Find first free dimension in LHS, and lower when found. + VectorType lhsType = op.getLhsType(); + for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e; ++lhsIndex) { + if (lhsContractingDimSet.count(lhsIndex) == 0) { + rewriter.replaceOp( + op, lowerParallel(op, lhsIndex, /*rhsIndex=*/-1, rewriter)); + return success(); + } + } + + // Find first free dimension in RHS, and lower when found. + VectorType rhsType = op.getRhsType(); + for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e; ++rhsIndex) { + if (rhsContractingDimSet.count(rhsIndex) == 0) { + rewriter.replaceOp( + op, lowerParallel(op, /*lhsIndex=*/-1, rhsIndex, rewriter)); + return success(); + } + } + + // Lower the first remaining reduction dimension. + if (!contractingDimMap.empty()) { + rewriter.replaceOp(op, lowerReduction(op, rewriter)); + return success(); + } + + return failure(); +} + +// Lower one parallel dimension. +// TODO(ajcbik): consider reusing existing contract unrolling +Value ContractionOpLowering::lowerParallel(vector::ContractionOp op, + int64_t lhsIndex, int64_t rhsIndex, + PatternRewriter &rewriter) const { + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + VectorType resType = op.getResultType().cast(); + // Find the iterator type index and result index. + SmallVector iMap = op.getIndexingMaps(); + int64_t iterIndex = -1; + int64_t dimSize = -1; + if (lhsIndex >= 0) { + iterIndex = iMap[0].getResult(lhsIndex).cast().getPosition(); + assert( + (rhsIndex < 0 || + iterIndex == + iMap[1].getResult(rhsIndex).cast().getPosition()) && + "parallel index should be free in LHS or batch in LHS/RHS"); + dimSize = lhsType.getDimSize(lhsIndex); + } else { + assert(rhsIndex >= 0 && "missing parallel index"); + iterIndex = iMap[1].getResult(rhsIndex).cast().getPosition(); + dimSize = rhsType.getDimSize(rhsIndex); + } + assert(iterIndex >= 0 && "parallel index not listed in operand mapping"); + Optional lookup = getResultIndex(iMap[2], iterIndex); + assert(lookup.hasValue() && "parallel index not listed in reduction"); + int64_t resIndex = lookup.getValue(); + // Construct new iterator types and affine map array attribute. + SmallVector lowIndexingMaps; + lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter)); + lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter)); + lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter)); + auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); + auto lowIter = + rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex)); + // Unroll into a series of lower dimensional vector.contract ops. + Location loc = op.getLoc(); + Value result = + rewriter.create(loc, resType, rewriter.getZeroAttr(resType)); + for (int64_t d = 0; d < dimSize; ++d) { + auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter); + auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter); + auto acc = reshapeLoad(loc, op.acc(), resType, resIndex, d, rewriter); + Value lowContract = rewriter.create( + loc, lhs, rhs, acc, lowAffine, lowIter); + result = + reshapeStore(loc, lowContract, result, resType, resIndex, d, rewriter); + } + return result; +} + +// Lower one reduction dimension. +Value ContractionOpLowering::lowerReduction(vector::ContractionOp op, + PatternRewriter &rewriter) const { + auto loc = op.getLoc(); + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + Type resType = op.getResultType(); + assert(!resType.isa()); + // Use iterator index 0. + int64_t iterIndex = 0; + SmallVector iMap = op.getIndexingMaps(); + Optional lookupLhs = getResultIndex(iMap[0], iterIndex); + Optional lookupRhs = getResultIndex(iMap[1], iterIndex); + assert(lookupLhs.hasValue() && "missing LHS parallel index"); + assert(lookupRhs.hasValue() && "missing RHS parallel index"); + int64_t lhsIndex = lookupLhs.getValue(); + int64_t rhsIndex = lookupRhs.getValue(); + int64_t dimSize = lhsType.getDimSize(lhsIndex); + assert(dimSize == rhsType.getDimSize(rhsIndex) && "corrupt shape"); + // Base case. + if (lhsType.getRank() == 1) { + assert(rhsType.getRank() == 1 && "corrupt contraction"); + Value zero = rewriter.create(loc, lhsType, + rewriter.getZeroAttr(lhsType)); + Value fma = rewriter.create(loc, op.lhs(), op.rhs(), zero); + StringAttr kind = rewriter.getStringAttr("add"); + return rewriter.create(loc, resType, kind, fma, + op.acc()); + } + // Construct new iterator types and affine map array attribute. + SmallVector lowIndexingMaps; + lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter)); + lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter)); + lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter)); + auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); + auto lowIter = + rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex)); + // Unroll into a series of lower dimensional vector.contract ops. + // By feeding the initial accumulator into the first contraction, + // and the result of each contraction into the next, eventually + // the sum of all reductions is computed. + Value result = op.acc(); + for (int64_t d = 0; d < dimSize; ++d) { + auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter); + auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter); + result = rewriter.create(loc, lhs, rhs, result, + lowAffine, lowIter); + } + return result; +} + +} // namespace mlir + // TODO(andydavis) Add pattern to rewrite ExtractSlices(ConstantMaskOp). // TODO(andydavis) Add this as DRR pattern. void mlir::vector::populateVectorToVectorTransformationPatterns( @@ -1685,6 +1758,8 @@ void mlir::vector::populateVectorContractLoweringPatterns( ShapeCastOp2DDownCastRewritePattern, ShapeCastOp2DUpCastRewritePattern, TransposeOpLowering>(context); + patterns.insert(parameters, context); // clang-format on - patterns.insert(parameters, context); } diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 72270dab11538..7eea3baa8d87c 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -1,5 +1,6 @@ -// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX +// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX --dump-input-on-failure +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT --dump-input-on-failure #dotp_accesses = [ affine_map<(i) -> (i)>, @@ -382,6 +383,35 @@ func @shape_casts(%a: vector<2x2xf32>) -> (vector<4xf32>, vector<2x2xf32>) { // MATRIX: %[[mm4:.*]] = vector.extract_strided_slice %[[mm1]] {offsets = [3], sizes = [3], strides = [1]} : vector<6xf32> to vector<3xf32> // MATRIX: %[[mm5:.*]] = vector.insert %[[mm4]], %[[mm3]] [1] : vector<3xf32> into vector<2x3xf32> // MATRIX: %[[mm6:.*]] = addf %[[C]], %[[mm5]] : vector<2x3xf32> + +// OUTERPRODUCT-LABEL: func @matmul +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x4xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT-SAME: : vector<2x4xf32> to vector<4x2xf32> +// +// OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<4x2xf32> +// OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<4x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]] +// OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32> +// +// OUTERPRODUCT: %[[a1:.*]] = vector.extract %[[At]][1] : vector<4x2xf32> +// OUTERPRODUCT: %[[b1:.*]] = vector.extract %[[B]][1] : vector<4x3xf32> +// OUTERPRODUCT: %[[c1:.*]] = vector.outerproduct %[[a1]], %[[b1]], %[[c0]] +// OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32> +// +// OUTERPRODUCT: %[[a2:.*]] = vector.extract %[[At]][2] : vector<4x2xf32> +// OUTERPRODUCT: %[[b2:.*]] = vector.extract %[[B]][2] : vector<4x3xf32> +// OUTERPRODUCT: %[[c2:.*]] = vector.outerproduct %[[a2]], %[[b2]], %[[c1]] +// OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32> +// +// OUTERPRODUCT: %[[a3:.*]] = vector.extract %[[At]][3] : vector<4x2xf32> +// OUTERPRODUCT: %[[b3:.*]] = vector.extract %[[B]][3] : vector<4x3xf32> +// OUTERPRODUCT: %[[c3:.*]] = vector.outerproduct %[[a3]], %[[b3]], %[[c2]] +// OUTERPRODUCT-SAME: : vector<2xf32>, vector<3xf32> +// +// OUTERPRODUCT: return %[[c3]] : vector<2x3xf32> func @matmul(%arg0: vector<2x4xf32>, %arg1: vector<4x3xf32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index c57540bc2ef70..65024dbe3acda 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -51,11 +51,26 @@ struct TestVectorContractionConversion *this, "vector-lower-matrix-intrinsics", llvm::cl::desc("Lower vector.contract to llvm.intr.matrix.multiply"), llvm::cl::init(false)}; + Option lowerToOuterProduct{ + *this, "vector-outerproduct", + llvm::cl::desc("Lower vector.contract to vector.outerproduct"), + llvm::cl::init(false)}; void runOnFunction() override { OwningRewritePatternList patterns; - VectorTransformsOptions options{ - /*lowerToLLVMMatrixIntrinsics=*/lowerToLLVMMatrixIntrinsics}; + if (lowerToOuterProduct) { + VectorContractLowering lowering = VectorContractLowering::OuterProduct; + VectorTransformsOptions options{lowering}; + patterns.insert(options, + &getContext()); + applyPatternsAndFoldGreedily(getFunction(), patterns); + return; + } + + VectorContractLowering lowering = VectorContractLowering::FMA; + if (lowerToLLVMMatrixIntrinsics) + lowering = VectorContractLowering::Matmul; + VectorTransformsOptions options{lowering}; populateVectorContractLoweringPatterns(patterns, &getContext(), options); applyPatternsAndFoldGreedily(getFunction(), patterns); } From 792575ff323b714d03215951c6fff105f1074aac Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 May 2020 14:47:02 +0100 Subject: [PATCH 097/770] [NFC][ARM][AArch64] More code size tests Add analysis runs for icmp, fcmp and select instructions. --- llvm/test/Analysis/CostModel/AArch64/cmp.ll | 52 +++ .../test/Analysis/CostModel/AArch64/select.ll | 47 ++- llvm/test/Analysis/CostModel/ARM/cmps.ll | 154 +++++++ llvm/test/Analysis/CostModel/ARM/icmps.ll | 56 --- llvm/test/Analysis/CostModel/ARM/select.ll | 378 ++++++++++++------ 5 files changed, 492 insertions(+), 195 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/cmp.ll create mode 100644 llvm/test/Analysis/CostModel/ARM/cmps.ll delete mode 100644 llvm/test/Analysis/CostModel/ARM/icmps.ll diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll new file mode 100644 index 0000000000000..c8512bb2664ce --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=aarch64-- < %s | FileCheck %s --check-prefix=CHECK-THROUGHPUT +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=aarch64-- < %s | FileCheck %s --check-prefix=CHECK-SIZE + +define i32 @cmps() { +; CHECK-THROUGHPUT-LABEL: 'cmps' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = icmp slt i8 undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = icmp ult i16 undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = icmp sge i32 undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a3 = icmp ne i64 undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = icmp slt <16 x i8> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = icmp ult <8 x i16> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = icmp sge <4 x i32> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-SIZE-LABEL: 'cmps' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = icmp slt i8 undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = icmp ult i16 undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = icmp sge i32 undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a3 = icmp ne i64 undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = icmp slt <16 x i8> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = icmp ult <8 x i16> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = icmp sge <4 x i32> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %a0 = icmp slt i8 undef, undef + %a1 = icmp ult i16 undef, undef + %a2 = icmp sge i32 undef, undef + %a3 = icmp ne i64 undef, undef + %a4 = icmp slt <16 x i8> undef, undef + %a5 = icmp ult <8 x i16> undef, undef + %a6 = icmp sge <4 x i32> undef, undef + %a7 = fcmp oge half undef, undef + %a8 = fcmp ogt float undef, undef + %a9 = fcmp ogt double undef, undef + %a10 = fcmp olt <8 x half> undef, undef + %a11 = fcmp oge <4 x float> undef, undef + %a12 = fcmp oge <2 x double> undef, undef + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll index 1a1248e661c58..25af9af1c6e93 100644 --- a/llvm/test/Analysis/CostModel/AArch64/select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/select.ll @@ -1,37 +1,56 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-THROUGHPUT +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=aarch64-- | FileCheck %s --check-prefix=CHECK-SIZE target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -; CHECK-LABEL: select define void @select() { ; Scalar values - ; CHECK: cost of 1 {{.*}} select +; CHECK-THROUGHPUT-LABEL: 'select' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SIZE-LABEL: 'select' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; %v1 = select i1 undef, i8 undef, i8 undef - ; CHECK: cost of 1 {{.*}} select %v2 = select i1 undef, i16 undef, i16 undef - ; CHECK: cost of 1 {{.*}} select %v3 = select i1 undef, i32 undef, i32 undef - ; CHECK: cost of 1 {{.*}} select %v4 = select i1 undef, i64 undef, i64 undef - ; CHECK: cost of 1 {{.*}} select %v5 = select i1 undef, float undef, float undef - ; CHECK: cost of 1 {{.*}} select %v6 = select i1 undef, double undef, double undef - ; CHECK: cost of 16 {{.*}} select %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef - ; CHECK: cost of 8 {{.*}} select %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef - ; CHECK: cost of 16 {{.*}} select %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef ; Vector values - check for vectors of i64s that have a high cost because ; they end up scalarized. - ; CHECK: cost of 80 {{.*}} select %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef - ; CHECK: cost of 160 {{.*}} select %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef - ; CHECK: cost of 320 {{.*}} select %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef ret void diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll new file mode 100644 index 0000000000000..d7d84b2388b0e --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-RECIP +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-RECIP +; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-RECIP +; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-RECIP +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE +; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE + +define i32 @cmps() { +; CHECK-MVE-RECIP-LABEL: 'cmps' +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps' +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8M-BASE-RECIP-LABEL: 'cmps' +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-V8R-RECIP-LABEL: 'cmps' +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-MVE-SIZE-LABEL: 'cmps' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; CHECK-V8M-MAIN-SIZE-LABEL: 'cmps' +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; CHECK-V8M-BASE-SIZE-LABEL: 'cmps' +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; CHECK-V8R-SIZE-LABEL: 'cmps' +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %a = icmp slt i8 undef, undef + %b = icmp ult i16 undef, undef + %c = icmp sge i32 undef, undef + %d = icmp ne i64 undef, undef + %e = icmp slt <16 x i8> undef, undef + %f = icmp ult <8 x i16> undef, undef + %g = icmp sge <4 x i32> undef, undef + %a7 = fcmp oge half undef, undef + %a8 = fcmp ogt float undef, undef + %a9 = fcmp ogt double undef, undef + %a10 = fcmp olt <8 x half> undef, undef + %a11 = fcmp oge <4 x float> undef, undef + %a12 = fcmp oge <2 x double> undef, undef + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/ARM/icmps.ll b/llvm/test/Analysis/CostModel/ARM/icmps.ll deleted file mode 100644 index 962ddb499347e..0000000000000 --- a/llvm/test/Analysis/CostModel/ARM/icmps.ll +++ /dev/null @@ -1,56 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN -; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE -; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R - -define i32 @icmps() { -; CHECK-MVE-LABEL: 'icmps' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = icmp slt <16 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = icmp ult <8 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %g = icmp sge <4 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; CHECK-V8M-MAIN-LABEL: 'icmps' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; CHECK-V8M-BASE-LABEL: 'icmps' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; CHECK-V8R-LABEL: 'icmps' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; - %a = icmp slt i8 undef, undef - %b = icmp ult i16 undef, undef - %c = icmp sge i32 undef, undef - %d = icmp ne i64 undef, undef - %e = icmp slt <16 x i8> undef, undef - %f = icmp ult <8 x i16> undef, undef - %g = icmp sge <4 x i32> undef, undef - ret i32 undef -} diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index df62ff0f09aef..1e350f139afe8 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -1,135 +1,263 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1 -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2 +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-RECIP +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-RECIP +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-RECIP +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE define void @selects() { -; CHECK-MVE-LABEL: 'selects' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void + ; Scalar values +; CHECK-MVE-RECIP-LABEL: 'selects' +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; CHECK-NEON-LABEL: 'selects' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-RECIP-LABEL: 'selects' +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; CHECK-THUMB1-LABEL: 'selects' -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-THUMB1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-THUMB1-RECIP-LABEL: 'selects' +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; CHECK-THUMB2-LABEL: 'selects' -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-THUMB2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-THUMB2-RECIP-LABEL: 'selects' +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVE-SIZE-LABEL: 'selects' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-NEON-SIZE-LABEL: 'selects' +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-THUMB1-SIZE-LABEL: 'selects' +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-THUMB2-SIZE-LABEL: 'selects' +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - ; Scalar values %v1 = select i1 undef, i8 undef, i8 undef %v2 = select i1 undef, i16 undef, i16 undef %v3 = select i1 undef, i32 undef, i32 undef From 222e0e58a87649623b3d16ce3fef56a6a0555be3 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Mon, 25 May 2020 12:05:57 +0200 Subject: [PATCH 098/770] [MLIR] Helper class referencing MemRefType to unify runner implementations. Summary: Add DynamicMemRefType which can reference one of the statically ranked StridedMemRefType or a UnrankedMemRefType so that runner utils only need to be implemented once. There is definitely room for more clean up and unification, but I will keep that for follow-ups. Reviewers: nicolasvasilache Reviewed By: nicolasvasilache Subscribers: mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, stephenneuendorffer, Joonsoo, grosul1, frgossen, Kayjukh, jurahul, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80513 --- .../mlir/ExecutionEngine/CRunnerUtils.h | 43 ++++- .../mlir/ExecutionEngine/RunnerUtils.h | 150 ++++++++++-------- mlir/lib/ExecutionEngine/RunnerUtils.cpp | 47 +----- .../test/mlir-cpu-runner/unranked_memref.mlir | 4 - mlir/test/mlir-cpu-runner/utils.mlir | 7 +- .../cuda-runtime-wrappers.cpp | 21 ++- 6 files changed, 132 insertions(+), 140 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h index 8155820d63473..bc59d3de20860 100644 --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -33,12 +33,6 @@ #include -template -void dropFront(int64_t arr[N], int64_t *res) { - for (unsigned i = 1; i < N; ++i) - *(res + i - 1) = arr[i]; -} - //===----------------------------------------------------------------------===// // Codegen-compatible structures for Vector type. //===----------------------------------------------------------------------===// @@ -129,6 +123,10 @@ struct StridedMemRefType { res.basePtr = basePtr; res.data = data; res.offset = offset + idx * strides[0]; + auto dropFront = [](const int64_t *arr, int64_t *res) { + for (unsigned i = 1; i < N; ++i) + res[i - 1] = arr[i]; + }; dropFront(sizes, res.sizes); dropFront(strides, res.strides); return res; @@ -164,6 +162,39 @@ struct UnrankedMemRefType { void *descriptor; }; +//===----------------------------------------------------------------------===// +// DynamicMemRefType type. +//===----------------------------------------------------------------------===// +// A reference to one of the StridedMemRef types. +template +class DynamicMemRefType { +public: + explicit DynamicMemRefType(const StridedMemRefType &mem_ref) + : rank(0), basePtr(mem_ref.basePtr), data(mem_ref.data), + offset(mem_ref.offset), sizes(nullptr), strides(nullptr) {} + template + explicit DynamicMemRefType(const StridedMemRefType &mem_ref) + : rank(N), basePtr(mem_ref.basePtr), data(mem_ref.data), + offset(mem_ref.offset), sizes(mem_ref.sizes), strides(mem_ref.strides) { + } + explicit DynamicMemRefType(const UnrankedMemRefType &mem_ref) + : rank(mem_ref.rank) { + auto *desc = static_cast *>(mem_ref.descriptor); + basePtr = desc->basePtr; + data = desc->data; + offset = desc->offset; + sizes = rank == 0 ? nullptr : desc->sizes; + strides = sizes + rank; + } + + int64_t rank; + T *basePtr; + T *data; + int64_t offset; + const int64_t *sizes; + const int64_t *strides; +}; + //===----------------------------------------------------------------------===// // Small runtime support "lib" for vector.print lowering during codegen. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/ExecutionEngine/RunnerUtils.h b/mlir/include/mlir/ExecutionEngine/RunnerUtils.h index 5f239a4c146ea..7729b9c887967 100644 --- a/mlir/include/mlir/ExecutionEngine/RunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/RunnerUtils.h @@ -35,29 +35,35 @@ #include "mlir/ExecutionEngine/CRunnerUtils.h" -template -void printMemRefMetaData(StreamType &os, StridedMemRefType &V) { - static_assert(N > 0, "Expected N > 0"); - os << "Memref base@ = " << reinterpret_cast(V.data) << " rank = " << N - << " offset = " << V.offset << " sizes = [" << V.sizes[0]; - for (unsigned i = 1; i < N; ++i) - os << ", " << V.sizes[i]; - os << "] strides = [" << V.strides[0]; - for (unsigned i = 1; i < N; ++i) - os << ", " << V.strides[i]; +template +void printMemRefMetaData(StreamType &os, const DynamicMemRefType &V) { + os << "base@ = " << reinterpret_cast(V.data) << " rank = " << V.rank + << " offset = " << V.offset; + auto print = [&](const int64_t *ptr) { + if (V.rank == 0) + return; + os << ptr[0]; + for (int64_t i = 1; i < V.rank; ++i) + os << ", " << ptr[i]; + }; + os << " sizes = ["; + print(V.sizes); + os << "] strides = ["; + print(V.strides); os << "]"; } -template -void printMemRefMetaData(StreamType &os, StridedMemRefType &V) { - os << "Memref base@ = " << reinterpret_cast(V.data) << " rank = 0" - << " offset = " << V.offset; +template +void printMemRefMetaData(StreamType &os, StridedMemRefType &V) { + static_assert(N >= 0, "Expected N > 0"); + os << "MemRef "; + printMemRefMetaData(os, DynamicMemRefType(V)); } -template +template void printUnrankedMemRefMetaData(StreamType &os, UnrankedMemRefType &V) { - os << "Unranked Memref rank = " << V.rank << " " - << "descriptor@ = " << reinterpret_cast(V.descriptor) << "\n"; + os << "Unranked MemRef "; + printMemRefMetaData(os, DynamicMemRefType(V)); } //////////////////////////////////////////////////////////////////////////////// @@ -118,88 +124,92 @@ std::ostream &operator<<(std::ostream &os, const Vector &v) { return os; } -template struct MemRefDataPrinter { - static void print(std::ostream &os, T *base, int64_t rank, int64_t offset, - int64_t *sizes, int64_t *strides); - static void printFirst(std::ostream &os, T *base, int64_t rank, - int64_t offset, int64_t *sizes, int64_t *strides); - static void printLast(std::ostream &os, T *base, int64_t rank, int64_t offset, - int64_t *sizes, int64_t *strides); -}; - -template struct MemRefDataPrinter { - static void print(std::ostream &os, T *base, int64_t rank, int64_t offset, - int64_t *sizes = nullptr, int64_t *strides = nullptr); +template +struct MemRefDataPrinter { + static void print(std::ostream &os, T *base, int64_t dim, int64_t rank, + int64_t offset, const int64_t *sizes, + const int64_t *strides); + static void printFirst(std::ostream &os, T *base, int64_t dim, int64_t rank, + int64_t offset, const int64_t *sizes, + const int64_t *strides); + static void printLast(std::ostream &os, T *base, int64_t dim, int64_t rank, + int64_t offset, const int64_t *sizes, + const int64_t *strides); }; -template -void MemRefDataPrinter::printFirst(std::ostream &os, T *base, - int64_t rank, int64_t offset, - int64_t *sizes, int64_t *strides) { +template +void MemRefDataPrinter::printFirst(std::ostream &os, T *base, int64_t dim, + int64_t rank, int64_t offset, + const int64_t *sizes, + const int64_t *strides) { os << "["; - MemRefDataPrinter::print(os, base, rank, offset, sizes + 1, - strides + 1); + print(os, base, dim - 1, rank, offset, sizes + 1, strides + 1); // If single element, close square bracket and return early. if (sizes[0] <= 1) { os << "]"; return; } os << ", "; - if (N > 1) + if (dim > 1) os << "\n"; } -template -void MemRefDataPrinter::print(std::ostream &os, T *base, int64_t rank, - int64_t offset, int64_t *sizes, - int64_t *strides) { - printFirst(os, base, rank, offset, sizes, strides); +template +void MemRefDataPrinter::print(std::ostream &os, T *base, int64_t dim, + int64_t rank, int64_t offset, + const int64_t *sizes, const int64_t *strides) { + if (dim == 0) { + os << base[offset]; + return; + } + printFirst(os, base, dim, rank, offset, sizes, strides); for (unsigned i = 1; i + 1 < sizes[0]; ++i) { - printSpace(os, rank - N + 1); - MemRefDataPrinter::print(os, base, rank, offset + i * strides[0], - sizes + 1, strides + 1); + printSpace(os, rank - dim + 1); + print(os, base, dim - 1, rank, offset + i * strides[0], sizes + 1, + strides + 1); os << ", "; - if (N > 1) + if (dim > 1) os << "\n"; } if (sizes[0] <= 1) return; - printLast(os, base, rank, offset, sizes, strides); + printLast(os, base, dim, rank, offset, sizes, strides); } -template -void MemRefDataPrinter::printLast(std::ostream &os, T *base, int64_t rank, - int64_t offset, int64_t *sizes, - int64_t *strides) { - printSpace(os, rank - N + 1); - MemRefDataPrinter::print(os, base, rank, - offset + (sizes[0] - 1) * (*strides), - sizes + 1, strides + 1); +template +void MemRefDataPrinter::printLast(std::ostream &os, T *base, int64_t dim, + int64_t rank, int64_t offset, + const int64_t *sizes, + const int64_t *strides) { + printSpace(os, rank - dim + 1); + print(os, base, dim - 1, rank, offset + (sizes[0] - 1) * (*strides), + sizes + 1, strides + 1); os << "]"; } template -void MemRefDataPrinter::print(std::ostream &os, T *base, int64_t rank, - int64_t offset, int64_t *sizes, - int64_t *strides) { - os << base[offset]; -} - -template void printMemRef(StridedMemRefType &M) { - static_assert(N > 0, "Expected N > 0"); +void printMemRef(const DynamicMemRefType &M) { printMemRefMetaData(std::cout, M); std::cout << " data = " << std::endl; - MemRefDataPrinter::print(std::cout, M.data, N, M.offset, M.sizes, - M.strides); + if (M.rank == 0) + std::cout << "["; + MemRefDataPrinter::print(std::cout, M.data, M.rank, M.rank, M.offset, + M.sizes, M.strides); + if (M.rank == 0) + std::cout << "]"; std::cout << std::endl; } -template void printMemRef(StridedMemRefType &M) { - printMemRefMetaData(std::cout, M); - std::cout << " data = " << std::endl; - std::cout << "["; - MemRefDataPrinter::print(std::cout, M.data, 0, M.offset); - std::cout << "]" << std::endl; +template +void printMemRef(StridedMemRefType &M) { + std::cout << "Memref "; + printMemRef(DynamicMemRefType(M)); +} + +template +void printMemRef(UnrankedMemRefType &M) { + std::cout << "Unranked Memref "; + printMemRef(DynamicMemRefType(M)); } } // namespace impl diff --git a/mlir/lib/ExecutionEngine/RunnerUtils.cpp b/mlir/lib/ExecutionEngine/RunnerUtils.cpp index 7991eca61994f..7497ebdacf689 100644 --- a/mlir/lib/ExecutionEngine/RunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/RunnerUtils.cpp @@ -24,57 +24,16 @@ extern "C" void _mlir_ciface_print_memref_vector_4x4xf32( impl::printMemRef(*M); } -#define MEMREF_CASE(TYPE, RANK) \ - case RANK: \ - impl::printMemRef(*(static_cast *>(ptr))); \ - break - extern "C" void _mlir_ciface_print_memref_i8(UnrankedMemRefType *M) { - printUnrankedMemRefMetaData(std::cout, *M); - int64_t rank = M->rank; - void *ptr = M->descriptor; - - switch (rank) { - MEMREF_CASE(int8_t, 0); - MEMREF_CASE(int8_t, 1); - MEMREF_CASE(int8_t, 2); - MEMREF_CASE(int8_t, 3); - MEMREF_CASE(int8_t, 4); - default: - assert(0 && "Unsupported rank to print"); - } + impl::printMemRef(*M); } extern "C" void _mlir_ciface_print_memref_i32(UnrankedMemRefType *M) { - printUnrankedMemRefMetaData(std::cout, *M); - int64_t rank = M->rank; - void *ptr = M->descriptor; - - switch (rank) { - MEMREF_CASE(int32_t, 0); - MEMREF_CASE(int32_t, 1); - MEMREF_CASE(int32_t, 2); - MEMREF_CASE(int32_t, 3); - MEMREF_CASE(int32_t, 4); - default: - assert(0 && "Unsupported rank to print"); - } + impl::printMemRef(*M); } extern "C" void _mlir_ciface_print_memref_f32(UnrankedMemRefType *M) { - printUnrankedMemRefMetaData(std::cout, *M); - int64_t rank = M->rank; - void *ptr = M->descriptor; - - switch (rank) { - MEMREF_CASE(float, 0); - MEMREF_CASE(float, 1); - MEMREF_CASE(float, 2); - MEMREF_CASE(float, 3); - MEMREF_CASE(float, 4); - default: - assert(0 && "Unsupported rank to print"); - } + impl::printMemRef(*M); } extern "C" void print_memref_i32(int64_t rank, void *ptr) { diff --git a/mlir/test/mlir-cpu-runner/unranked_memref.mlir b/mlir/test/mlir-cpu-runner/unranked_memref.mlir index aa54b56b06b74..0eb68ac033687 100644 --- a/mlir/test/mlir-cpu-runner/unranked_memref.mlir +++ b/mlir/test/mlir-cpu-runner/unranked_memref.mlir @@ -1,25 +1,21 @@ // RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_test_cblas%shlibext,%linalg_test_lib_dir/libmlir_test_cblas_interface%shlibext | FileCheck %s -// CHECK: rank = 2 // CHECK: rank = 2 // CHECK-SAME: sizes = [10, 3] // CHECK-SAME: strides = [3, 1] // CHECK-COUNT-10: [10, 10, 10] // // CHECK: rank = 2 -// CHECK: rank = 2 // CHECK-SAME: sizes = [10, 3] // CHECK-SAME: strides = [3, 1] // CHECK-COUNT-10: [5, 5, 5] // // CHECK: rank = 2 -// CHECK: rank = 2 // CHECK-SAME: sizes = [10, 3] // CHECK-SAME: strides = [3, 1] // CHECK-COUNT-10: [2, 2, 2] // // CHECK: rank = 0 -// CHECK: rank = 0 // 122 is ASCII for 'z'. // CHECK: [z] func @main() -> () { diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir index d3ab6177eb65c..65957400bf7f2 100644 --- a/mlir/test/mlir-cpu-runner/utils.mlir +++ b/mlir/test/mlir-cpu-runner/utils.mlir @@ -12,8 +12,7 @@ func @print_0d() { dealloc %A : memref return } -// PRINT-0D: Unranked Memref rank = 0 descriptor@ = {{.*}} -// PRINT-0D: Memref base@ = {{.*}} rank = 0 offset = 0 data = +// PRINT-0D: Unranked Memref base@ = {{.*}} rank = 0 offset = 0 sizes = [] strides = [] data = // PRINT-0D: [2] func @print_1d() { @@ -26,7 +25,7 @@ func @print_1d() { dealloc %A : memref<16xf32> return } -// PRINT-1D: Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [16] strides = [1] data = +// PRINT-1D: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [16] strides = [1] data = // PRINT-1D-NEXT: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] func @print_3d() { @@ -43,7 +42,7 @@ func @print_3d() { dealloc %A : memref<3x4x5xf32> return } -// PRINT-3D: Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [3, 4, 5] strides = [20, 5, 1] data = +// PRINT-3D: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [3, 4, 5] strides = [20, 5, 1] data = // PRINT-3D-COUNT-4: {{.*[[:space:]].*}}2, 2, 2, 2, 2 // PRINT-3D-COUNT-4: {{.*[[:space:]].*}}2, 2, 2, 2, 2 // PRINT-3D-COUNT-2: {{.*[[:space:]].*}}2, 2, 2, 2, 2 diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp index dbe78a55c0b1f..705fa9f00930a 100644 --- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp +++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp @@ -83,10 +83,10 @@ extern "C" void mgpuMemHostRegister(void *ptr, uint64_t sizeBytes) { // Allows to register a MemRef with the CUDA runtime. Initializes array with // value. Helpful until we have transfer functions implemented. template -void mcuMemHostRegisterMemRef(T *pointer, llvm::ArrayRef sizes, - llvm::ArrayRef strides, T value) { - assert(sizes.size() == strides.size()); - llvm::SmallVector denseStrides(strides.size()); +void mcuMemHostRegisterMemRef(const DynamicMemRefType &mem_ref, T value) { + llvm::SmallVector denseStrides(mem_ref.rank); + llvm::ArrayRef sizes(mem_ref.sizes, mem_ref.rank); + llvm::ArrayRef strides(mem_ref.strides, mem_ref.rank); std::partial_sum(sizes.rbegin(), sizes.rend(), denseStrides.rbegin(), std::multiplies()); @@ -98,20 +98,17 @@ void mcuMemHostRegisterMemRef(T *pointer, llvm::ArrayRef sizes, denseStrides.back() = 1; assert(strides == llvm::makeArrayRef(denseStrides)); + auto *pointer = mem_ref.data + mem_ref.offset; std::fill_n(pointer, count, value); mgpuMemHostRegister(pointer, count * sizeof(T)); } extern "C" void mcuMemHostRegisterFloat(int64_t rank, void *ptr) { - auto *desc = static_cast *>(ptr); - auto sizes = llvm::ArrayRef(desc->sizes, rank); - auto strides = llvm::ArrayRef(desc->sizes + rank, rank); - mcuMemHostRegisterMemRef(desc->data + desc->offset, sizes, strides, 1.23f); + UnrankedMemRefType mem_ref = {rank, ptr}; + mcuMemHostRegisterMemRef(DynamicMemRefType(mem_ref), 1.23f); } extern "C" void mcuMemHostRegisterInt32(int64_t rank, void *ptr) { - auto *desc = static_cast *>(ptr); - auto sizes = llvm::ArrayRef(desc->sizes, rank); - auto strides = llvm::ArrayRef(desc->sizes + rank, rank); - mcuMemHostRegisterMemRef(desc->data + desc->offset, sizes, strides, 123); + UnrankedMemRefType mem_ref = {rank, ptr}; + mcuMemHostRegisterMemRef(DynamicMemRefType(mem_ref), 123); } From 2dd7714b8d264f6436b56582e4448f6a003a61fc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 19 May 2020 23:03:39 -0400 Subject: [PATCH 099/770] AMDGPU/GlobalISel: Don't select boolean phi by default This is currently missing most of the hard parts to lower correctly, so disable it for now. This fixes at least one OpenCL conformance test and allows it to pass with fallback. Hide this behind an option for now. --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 14 ++++++++++++++ .../AMDGPU/GlobalISel/divergent-control-flow.ll | 2 +- .../CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 95795f87faaf2..242a108f156c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -39,6 +39,12 @@ using namespace llvm; using namespace MIPatternMatch; +static cl::opt AllowRiskySelect( + "amdgpu-global-isel-risky-select", + cl::desc("Allow GlobalISel to select cases that are likely to not work yet"), + cl::init(false), + cl::ReallyHidden); + #define GET_GLOBALISEL_IMPL #define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" @@ -196,6 +202,14 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { const Register DefReg = I.getOperand(0).getReg(); const LLT DefTy = MRI->getType(DefReg); + if (DefTy == LLT::scalar(1)) { + if (!AllowRiskySelect) { + LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Selecting risky boolean phi\n"); + } // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index bd313de000df4..9c47fab05aa05 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s ; Make sure the branch targets are correct after lowering llvm.amdgcn.if diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir index c0bfa38812226..b4ef0caebfc13 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GCN +# RUN: llc -march=amdgcn -amdgpu-global-isel-risky-select -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GCN --- name: g_phi_s32_ss_sbranch From 099a875f28d0131a6ae85af91b9eb8627917fbbe Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 26 May 2020 09:58:25 -0500 Subject: [PATCH 100/770] [PowerPC] Unaligned FP default should apply to scalars only As reported in PR45186, we could be in a situation where we don't want to handle unaligned memory accesses for FP scalars but still have VSX (which allows unaligned access for vectors). Change the default to only apply to scalars. Fixes: https://bugs.llvm.org/show_bug.cgi?id=45186 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- llvm/test/CodeGen/PowerPC/pr45186.ll | 132 ++++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr45186.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2f9ff293c2775..42df83831113a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15659,7 +15659,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; - if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess()) + if (VT.isFloatingPoint() && !VT.isVector() && + !Subtarget.allowsUnalignedFPAccess()) return false; if (VT.getSimpleVT().isVector()) { diff --git a/llvm/test/CodeGen/PowerPC/pr45186.ll b/llvm/test/CodeGen/PowerPC/pr45186.ll new file mode 100644 index 0000000000000..92f748e3ef5a3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr45186.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64-- -mattr=+vsx \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +%struct.anon = type { i64, i64 } + +@d = local_unnamed_addr global %struct.anon zeroinitializer, align 8 + +; Function Attrs: norecurse nounwind readonly +define i64 @e(i8* nocapture readonly %f) local_unnamed_addr #0 { +; CHECK-LABEL: e: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldx r3, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* %f, align 1 + %conv = zext i8 %0 to i64 + %shl = shl nuw i64 %conv, 56 + %arrayidx1 = getelementptr inbounds i8, i8* %f, i64 1 + %1 = load i8, i8* %arrayidx1, align 1 + %conv2 = zext i8 %1 to i64 + %shl3 = shl nuw nsw i64 %conv2, 48 + %or = or i64 %shl3, %shl + %arrayidx4 = getelementptr inbounds i8, i8* %f, i64 2 + %2 = load i8, i8* %arrayidx4, align 1 + %conv5 = zext i8 %2 to i64 + %shl6 = shl nuw nsw i64 %conv5, 40 + %or7 = or i64 %or, %shl6 + %arrayidx8 = getelementptr inbounds i8, i8* %f, i64 3 + %3 = load i8, i8* %arrayidx8, align 1 + %conv9 = zext i8 %3 to i64 + %shl10 = shl nuw nsw i64 %conv9, 32 + %or11 = or i64 %or7, %shl10 + %arrayidx12 = getelementptr inbounds i8, i8* %f, i64 4 + %4 = load i8, i8* %arrayidx12, align 1 + %conv13 = zext i8 %4 to i64 + %shl14 = shl nuw nsw i64 %conv13, 24 + %or15 = or i64 %or11, %shl14 + %arrayidx16 = getelementptr inbounds i8, i8* %f, i64 5 + %5 = load i8, i8* %arrayidx16, align 1 + %conv17 = zext i8 %5 to i64 + %shl18 = shl nuw nsw i64 %conv17, 16 + %or20 = or i64 %or15, %shl18 + %arrayidx21 = getelementptr inbounds i8, i8* %f, i64 6 + %6 = load i8, i8* %arrayidx21, align 1 + %conv22 = zext i8 %6 to i64 + %shl23 = shl nuw nsw i64 %conv22, 8 + %or25 = or i64 %or20, %shl23 + %arrayidx26 = getelementptr inbounds i8, i8* %f, i64 7 + %7 = load i8, i8* %arrayidx26, align 1 + %conv27 = zext i8 %7 to i64 + %or28 = or i64 %or25, %conv27 + ret i64 %or28 +} + +; Function Attrs: nofree norecurse nounwind +define void @g() local_unnamed_addr #0 { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: addis r4, r2, .LC1@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: ld r4, .LC1@toc@l(r4) +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: stxvd2x vs0, 0, r4 +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* getelementptr inbounds (i8, i8* bitcast (void ()* @g to i8*), i64 8), align 1 + %conv.i = zext i8 %0 to i64 + %shl.i = shl nuw i64 %conv.i, 56 + %1 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 9), align 1 + %conv2.i = zext i8 %1 to i64 + %shl3.i = shl nuw nsw i64 %conv2.i, 48 + %or.i = or i64 %shl3.i, %shl.i + %2 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 10), align 1 + %conv5.i = zext i8 %2 to i64 + %shl6.i = shl nuw nsw i64 %conv5.i, 40 + %or7.i = or i64 %or.i, %shl6.i + %3 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 11), align 1 + %conv9.i = zext i8 %3 to i64 + %shl10.i = shl nuw nsw i64 %conv9.i, 32 + %or11.i = or i64 %or7.i, %shl10.i + %4 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 12), align 1 + %conv13.i = zext i8 %4 to i64 + %shl14.i = shl nuw nsw i64 %conv13.i, 24 + %or15.i = or i64 %or11.i, %shl14.i + %5 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 13), align 1 + %conv17.i = zext i8 %5 to i64 + %shl18.i = shl nuw nsw i64 %conv17.i, 16 + %or20.i = or i64 %or15.i, %shl18.i + %6 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 14), align 1 + %conv22.i = zext i8 %6 to i64 + %shl23.i = shl nuw nsw i64 %conv22.i, 8 + %or25.i = or i64 %or20.i, %shl23.i + %7 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 15), align 1 + %conv27.i = zext i8 %7 to i64 + %or28.i = or i64 %or25.i, %conv27.i + store i64 %or28.i, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i64 0, i32 1), align 8 + %8 = load i8, i8* bitcast (void ()* @g to i8*), align 1 + %conv.i2 = zext i8 %8 to i64 + %shl.i3 = shl nuw i64 %conv.i2, 56 + %9 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 1), align 1 + %conv2.i4 = zext i8 %9 to i64 + %shl3.i5 = shl nuw nsw i64 %conv2.i4, 48 + %or.i6 = or i64 %shl3.i5, %shl.i3 + %10 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 2), align 1 + %conv5.i7 = zext i8 %10 to i64 + %shl6.i8 = shl nuw nsw i64 %conv5.i7, 40 + %or7.i9 = or i64 %or.i6, %shl6.i8 + %11 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 3), align 1 + %conv9.i10 = zext i8 %11 to i64 + %shl10.i11 = shl nuw nsw i64 %conv9.i10, 32 + %or11.i12 = or i64 %or7.i9, %shl10.i11 + %12 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 4), align 1 + %conv13.i13 = zext i8 %12 to i64 + %shl14.i14 = shl nuw nsw i64 %conv13.i13, 24 + %or15.i15 = or i64 %or11.i12, %shl14.i14 + %13 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 5), align 1 + %conv17.i16 = zext i8 %13 to i64 + %shl18.i17 = shl nuw nsw i64 %conv17.i16, 16 + %or20.i18 = or i64 %or15.i15, %shl18.i17 + %14 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 6), align 1 + %conv22.i19 = zext i8 %14 to i64 + %shl23.i20 = shl nuw nsw i64 %conv22.i19, 8 + %or25.i21 = or i64 %or20.i18, %shl23.i20 + %15 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 7), align 1 + %conv27.i22 = zext i8 %15 to i64 + %or28.i23 = or i64 %or25.i21, %conv27.i22 + store i64 %or28.i23, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i64 0, i32 0), align 8 + ret void +} + +attributes #0 = { nounwind } From e72cba975735c2202b254621d79fb9dbbed08d39 Mon Sep 17 00:00:00 2001 From: Daniel Frampton Date: Thu, 21 May 2020 11:03:24 +0100 Subject: [PATCH 101/770] Use configure depends to trigger reconfiguration when LLVMBuild files change Summary: The existing logic has a workaround where configure_file is used to write a single dummy file output many times. CMake has a feature to more directly add the dependency and avoid the dummy file (it is available in the minimum version specified). Reviewers: theraven Reviewed By: theraven Subscribers: theraven, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80218 --- llvm/utils/llvm-build/llvmbuild/main.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/llvm/utils/llvm-build/llvmbuild/main.py b/llvm/utils/llvm-build/llvmbuild/main.py index 99b82ad5e20c3..4f64c52608f9c 100644 --- a/llvm/utils/llvm-build/llvmbuild/main.py +++ b/llvm/utils/llvm-build/llvmbuild/main.py @@ -563,19 +563,10 @@ def write_cmake_fragment(self, output_path, enabled_optional_components): f.write(""" # LLVMBuild CMake fragment dependencies. # -# CMake has no builtin way to declare that the configuration depends on -# a particular file. However, a side effect of configure_file is to add -# said input file to CMake's internal dependency list. So, we use that -# and a dummy output file to communicate the dependency information to -# CMake. -# -# FIXME: File a CMake RFE to get a properly supported version of this -# feature. """) for dep in dependencies: f.write("""\ -configure_file(\"%s\" - ${CMAKE_CURRENT_BINARY_DIR}/DummyConfigureOutput)\n""" % ( +set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS \"%s\")\n""" % ( cmake_quote_path(dep),)) # Write the properties we use to encode the required library dependency From 8bc03d2168241f7b12265e9cd7e4eb7655709f34 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 15 May 2020 18:33:01 -0400 Subject: [PATCH 102/770] GlobalISel: Merge G_PTR_MASK with llvm.ptrmask intrinsic Confusingly, these were unrelated and had different semantics. The G_PTR_MASK instruction predates the llvm.ptrmask intrinsic, but has a different format. G_PTR_MASK only allows clearing the low bits of a pointer, and only a constant number of bits. The ptrmask intrinsic allows an arbitrary mask. Replace G_PTR_MASK to match the intrinsic. Only selects the cases that look like the old instruction. More work is needed to select the general case. Also new legalization code is still needed to deal with the case where the incoming mask size does not match the pointer size, which has a specified behavior in the langref. --- llvm/docs/GlobalISel/GenericOpcode.rst | 8 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 14 +- llvm/include/llvm/Support/TargetOpcodes.def | 5 +- llvm/include/llvm/Target/GenericOpcodes.td | 8 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 + .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 19 +- llvm/lib/CodeGen/MachineVerifier.cpp | 16 + .../AArch64/AArch64InstructionSelector.cpp | 13 +- .../Target/AArch64/AArch64LegalizerInfo.cpp | 4 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 15 +- .../Target/AMDGPU/AMDGPUInstructionSelector.h | 2 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 7 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + .../AArch64/GlobalISel/legalize-vaarg.mir | 17 +- .../GlobalISel/legalizer-info-validation.mir | 4 +- .../CodeGen/AArch64/GlobalISel/select.mir | 5 +- .../GlobalISel/inst-select-ptr-mask.mir | 475 ----------- .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 800 ++++++++++++++++++ .../AMDGPU/GlobalISel/irtranslator-ptrmask.ll | 161 ++++ .../GlobalISel/regbankselect-ptrmask.mir | 90 ++ llvm/test/MachineVerifier/test_g_ptrmask.mir | 54 ++ 21 files changed, 1195 insertions(+), 525 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-mask.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir create mode 100644 llvm/test/MachineVerifier/test_g_ptrmask.mir diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 7c418a0c05c17..6372192c0088f 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -287,14 +287,16 @@ typically bytes but this may vary between targets. There are currently no in-tree targets that use this with addressable units not equal to 8 bit. -G_PTR_MASK +G_PTRMASK ^^^^^^^^^^ -Zero the least significant N bits of a pointer. +Zero out an arbitrary mask of bits of a pointer. The mask type must be +an integer, and the number of vector elements must match for all +operands. This corresponds to :ref:`i_intr_llvm_ptrmask`. .. code-block:: none - %1:_(p0) = G_PTR_MASK %0, 3 + %2:_(p0) = G_PTRMASK %0, %1 G_SMIN, G_SMAX, G_UMIN, G_UMAX ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 19f65468791d4..0252a324de231 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -453,9 +453,15 @@ class MachineIRBuilder { const LLT ValueTy, uint64_t Value); - /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits + /// Build and insert \p Res = G_PTRMASK \p Op0, \p Op1 + MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, + const SrcOp &Op1) { + return buildInstr(TargetOpcode::G_PTRMASK, {Res}, {Op0, Op1}); + } + + /// Build and insert \p Res = G_PTRMASK \p Op0, \p G_CONSTANT (1 << NumBits) - 1 /// - /// G_PTR_MASK clears the low bits of a pointer operand without destroying its + /// This clears the low bits of a pointer operand without destroying its /// pointer properties. This has the effect of rounding the address *down* to /// a specified alignment in bits. /// @@ -466,8 +472,8 @@ class MachineIRBuilder { /// be cleared in \p Op0. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, - uint32_t NumBits); + MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, + uint32_t NumBits); /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1 /// diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 8385af9de2a41..eae831f3353b0 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -556,9 +556,8 @@ HANDLE_TARGET_OPCODE(G_FMAXIMUM) /// Generic pointer offset HANDLE_TARGET_OPCODE(G_PTR_ADD) -/// Clear the specified number of low bits in a pointer. This rounds the value -/// *down* to the given alignment. -HANDLE_TARGET_OPCODE(G_PTR_MASK) +/// Clear the specified bits in a pointer. +HANDLE_TARGET_OPCODE(G_PTRMASK) /// Generic signed integer minimum. HANDLE_TARGET_OPCODE(G_SMIN) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 441f1580dd254..5ba7844c8c9ee 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -358,9 +358,11 @@ def G_PTR_ADD : GenericInstruction { let hasSideEffects = 0; } -def G_PTR_MASK : GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src, unknown:$bits); +// Generic pointer mask. type1 should be an integer with the same +// bitwidth as the pointer type. +def G_PTRMASK : GenericInstruction { + let OutOperandList = (outs ptype0:$dst); + let InOperandList = (ins ptype0:$src, type1:$bits); let hasSideEffects = 0; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index eba352aedb071..df965e466698d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1281,6 +1281,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_INTRINSIC_TRUNC; case Intrinsic::readcyclecounter: return TargetOpcode::G_READCYCLECOUNTER; + case Intrinsic::ptrmask: + return TargetOpcode::G_PTRMASK; } return Intrinsic::not_intrinsic; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 7af8332bbce34..510572e6d4121 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -237,17 +237,14 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, return buildPtrAdd(Res, Op0, Cst.getReg(0)); } -MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res, - const SrcOp &Op0, - uint32_t NumBits) { - assert(Res.getLLTTy(*getMRI()).isPointer() && - Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); - - auto MIB = buildInstr(TargetOpcode::G_PTR_MASK); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - MIB.addImm(NumBits); - return MIB; +MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, + const SrcOp &Op0, + uint32_t NumBits) { + LLT PtrTy = Res.getLLTTy(*getMRI()); + LLT MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + Register MaskReg = getMRI()->createGenericVirtualRegister(MaskTy); + buildConstant(MaskReg, maskTrailingOnes(NumBits)); + return buildPtrMask(Res, Op0, MaskReg); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index f626c12916077..f07856d799c9f 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1102,6 +1102,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // TODO: Is the offset allowed to be a scalar with a vector? break; } + case TargetOpcode::G_PTRMASK: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT MaskTy = MRI->getType(MI->getOperand(2).getReg()); + if (!DstTy.isValid() || !SrcTy.isValid() || !MaskTy.isValid()) + break; + + if (!DstTy.getScalarType().isPointer()) + report("ptrmask result type must be a pointer", MI); + + if (!MaskTy.getScalarType().isScalar()) + report("ptrmask mask type must be an integer", MI); + + verifyVectorElementMatch(DstTy, MaskTy, MI); + break; + } case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 5789d1d2531c7..57eaf140a6380 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -2383,14 +2383,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return true; } - case TargetOpcode::G_PTR_MASK: { - uint64_t Align = I.getOperand(2).getImm(); - if (Align >= 64 || Align == 0) + case TargetOpcode::G_PTRMASK: { + Register MaskReg = I.getOperand(2).getReg(); + Optional MaskVal = getConstantVRegVal(MaskReg, MRI); + // TODO: Implement arbitrary cases + if (!MaskVal || !isShiftedMask_64(*MaskVal)) return false; - uint64_t Mask = ~((1ULL << Align) - 1); + uint64_t Mask = *MaskVal; I.setDesc(TII.get(AArch64::ANDXri)); - I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64)); + I.getOperand(2).ChangeToImmediate( + AArch64_AM::encodeLogicalImmediate(Mask, 64)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 4378f5dbd8f93..3caa9026c9d8e 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -108,7 +108,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .legalFor({{p0, s64}}) .clampScalar(1, s64, s64); - getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0}); + getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}}); getActionDefinitionsBuilder({G_SDIV, G_UDIV}) .legalFor({s32, s64}) @@ -744,7 +744,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1); auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); - DstPtr = MIRBuilder.buildPtrMask(PtrTy, ListTmp, Log2(Alignment)); + DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment)); } else DstPtr = List; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 242a108f156c7..e47f25f7828d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2231,9 +2231,14 @@ bool AMDGPUInstructionSelector::selectG_FRAME_INDEX_GLOBAL_VALUE( DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI); } -bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { - uint64_t Align = I.getOperand(2).getImm(); - const uint64_t Mask = ~((UINT64_C(1) << Align) - 1); +bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { + Register MaskReg = I.getOperand(2).getReg(); + Optional MaskVal = getConstantVRegVal(MaskReg, *MRI); + // TODO: Implement arbitrary cases + if (!MaskVal || !isShiftedMask_64(*MaskVal)) + return false; + + const uint64_t Mask = *MaskVal; MachineBasicBlock *BB = I.getParent(); @@ -2731,8 +2736,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_FRAME_INDEX: case TargetOpcode::G_GLOBAL_VALUE: return selectG_FRAME_INDEX_GLOBAL_VALUE(I); - case TargetOpcode::G_PTR_MASK: - return selectG_PTR_MASK(I); + case TargetOpcode::G_PTRMASK: + return selectG_PTRMASK(I); case TargetOpcode::G_EXTRACT_VECTOR_ELT: return selectG_EXTRACT_VECTOR_ELT(I); case TargetOpcode::G_INSERT_VECTOR_ELT: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 3e3a3d8326c22..0ac6788c69b8a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -130,7 +130,7 @@ class AMDGPUInstructionSelector : public InstructionSelector { bool selectG_SELECT(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; bool selectG_FRAME_INDEX_GLOBAL_VALUE(MachineInstr &I) const; - bool selectG_PTR_MASK(MachineInstr &I) const; + bool selectG_PTRMASK(MachineInstr &I) const; bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const; bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const; bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 63106df9e2a22..c24996b93fa06 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -560,7 +560,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); } - getActionDefinitionsBuilder({G_PTR_ADD, G_PTR_MASK}) + getActionDefinitionsBuilder(G_PTR_ADD) + .scalarize(0) + .alwaysLegal(); + + // TODO: Clamp mask to pointer sizes + getActionDefinitionsBuilder(G_PTRMASK) .scalarize(0) .alwaysLegal(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 1cfc7ccc6cb7b..f11563a66d410 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3228,6 +3228,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLVM_FALLTHROUGH; } case AMDGPU::G_PTR_ADD: + case AMDGPU::G_PTRMASK: case AMDGPU::G_ADD: case AMDGPU::G_SUB: case AMDGPU::G_MUL: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir index 7446fde7ba08d..a0cc566771189 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir @@ -15,17 +15,18 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: G_STORE [[GEP]](p0), [[COPY]](p0) :: (store 8) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK: G_STORE [[PTR_ADD]](p0), [[COPY]](p0) :: (store 8) ; CHECK: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8) - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD1]], [[C]](s64) - ; CHECK: G_STORE [[GEP1]](p0), [[COPY]](p0) :: (store 8) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD1]], [[C]](s64) + ; CHECK: G_STORE [[PTR_ADD1]](p0), [[COPY]](p0) :: (store 8) ; CHECK: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD2]], [[C1]](s64) - ; CHECK: [[PTR_MASK:%[0-9]+]]:_(p0) = G_PTR_MASK [[GEP2]], 4 - ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_MASK]], [[C]](s64) - ; CHECK: G_STORE [[GEP3]](p0), [[COPY]](p0) :: (store 8) + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD2]], [[C1]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C1]](s64) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[PTR_ADD2]], [[COPY1]](s64) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTRMASK]], [[C]](s64) + ; CHECK: G_STORE [[PTR_ADD3]](p0), [[COPY]](p0) :: (store 8) %0:_(p0) = COPY $x0 %1:_(s8) = G_VAARG %0(p0), 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 1b63f672aabec..4a68d09c545a0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -441,8 +441,8 @@ # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK -# DEBUG-NEXT: G_PTR_MASK (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: G_PTRMASK (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG: G_SMIN (opcode {{[0-9]+}}): 1 type index # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir index 2e38f1ce62e98..ca4091180b1a6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir @@ -125,12 +125,13 @@ legalized: true regBankSelected: true # CHECK: body: -# CHECK: %1:gpr64sp = ANDXri %0, 8060 +# CHECK: %2:gpr64sp = ANDXri %0, 8060 body: | bb.0: liveins: $x0 %0:gpr(p0) = COPY $x0 - %1:gpr(p0) = G_PTR_MASK %0, 3 + %const:gpr(s64) = G_CONSTANT i64 -8 + %1:gpr(p0) = G_PTRMASK %0, %const $x0 = COPY %1(p0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-mask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-mask.mir deleted file mode 100644 index fcc9565ce9ee4..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-mask.mir +++ /dev/null @@ -1,475 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s - ---- -name: ptr_mask_p3_sgpr_sgpr_1 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_sgpr_sgpr_1 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(p3) = G_PTR_MASK %0, 1 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_sgpr_sgpr_2 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_sgpr_sgpr_2 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(p3) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_sgpr_sgpr_3 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_sgpr_sgpr_3 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(p3) = G_PTR_MASK %0, 3 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_sgpr_sgpr_4 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_sgpr_sgpr_4 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(p3) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_sgpr_sgpr_29 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_sgpr_sgpr_29 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] - %0:sgpr(p3) = COPY $sgpr0 - %1:sgpr(p3) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_sgpr_sgpr_1 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_sgpr_sgpr_1 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(p0) = G_PTR_MASK %0, 1 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_sgpr_sgpr_2 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_sgpr_sgpr_2 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(p0) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_sgpr_sgpr_3 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_sgpr_sgpr_3 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(p0) = G_PTR_MASK %0, 3 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_sgpr_sgpr_4 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_sgpr_sgpr_4 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(p0) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_sgpr_sgpr_29 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_sgpr_sgpr_29 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(p0) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_vgpr_vgpr_1 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_vgpr_vgpr_1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_PTR_MASK %0, 1 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_vgpr_vgpr_2 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_vgpr_vgpr_2 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_vgpr_vgpr_3 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_vgpr_vgpr_3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_vgpr_vgpr_4 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_vgpr_vgpr_4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_vgpr_vgpr_29 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_vgpr_vgpr_29 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_vgpr_vgpr_1 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_vgpr_vgpr_1 - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_PTR_MASK %0, 1 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_vgpr_vgpr_2 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_vgpr_vgpr_2 - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_vgpr_vgpr_3 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_vgpr_vgpr_3 - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_vgpr_vgpr_4 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_vgpr_vgpr_4 - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_vgpr_vgpr_29 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_vgpr_vgpr_29 - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_PTR_MASK %0, 4 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p3_vgpr_sgpr_2 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0 - - ; CHECK-LABEL: name: ptr_mask_p3_vgpr_sgpr_2 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] - %0:sgpr(p3) = COPY $sgpr0 - %1:vgpr(p3) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... - ---- -name: ptr_mask_p0_vgpr_sgpr_2 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - - ; CHECK-LABEL: name: ptr_mask_p0_vgpr_sgpr_2 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:vgpr(p0) = G_PTR_MASK %0, 2 - S_ENDPGM 0, implicit %1 - -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir new file mode 100644 index 0000000000000..1f8325018af2a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -0,0 +1,800 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: ptrmask_p3_s32_sgpr_sgpr_sgpr +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_sgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p3) + %0:sgpr(p3) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p3) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: %const:sgpr(s32) = G_CONSTANT i32 -252645136 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p3) = G_PTRMASK [[COPY]], %const(s32) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p3) + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -252645136 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearhi1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -2147483648 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearhi2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1073741824 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -1073741824 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearlo1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -2 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearlo2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -4 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearlo3 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo3 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -8 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearlo4 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo4 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -16 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_s32_sgpr_sgpr_clearlo29 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo29 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -536870912 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -536870912 + %1:sgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_sgpr +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p0) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = COPY $sgpr2_sgpr3 + %2:sgpr(p0) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xf0f0f0f0f0f0f0f0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xf0f0f0f0f0f0f0f0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p0) = G_PTRMASK [[COPY]], [[C]](s64) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 + %2:sgpr(p0) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p0_s32_sgpr_sgpr_sgpr +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; CHECK-LABEL: name: ptrmask_p0_s32_sgpr_sgpr_sgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p0) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(p0) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearhi1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -9223372036854775808 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearhi32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4294967296 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -4294967296 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clear_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clear_32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967296 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 4294967296 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearlo1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -2 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearlo2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -4 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearlo3 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo3 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -8 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearlo4 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo4 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -16 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_sgpr_sgpr_clearlo29 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo29 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -536870912 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s64) = G_CONSTANT i64 -536870912 + %1:sgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: %const:vgpr(s32) = G_CONSTANT i32 -252645136 + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(s32) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p3) + %0:vgpr(p3) = COPY $vgpr0 + %const:vgpr(s32) = G_CONSTANT i32 -252645136 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_vgpr_clearlo1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(p3) = COPY $vgpr0 + %const:vgpr(s32) = G_CONSTANT i32 -2 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_vgpr_clearlo2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo2 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(p3) = COPY $vgpr0 + %const:vgpr(s32) = G_CONSTANT i32 -4 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_vgpr_clearlo3 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(p3) = COPY $vgpr0 + %const:vgpr(s32) = G_CONSTANT i32 -8 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_vgpr_clearlo4 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo4 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(p3) = COPY $vgpr0 + %const:vgpr(s32) = G_CONSTANT i32 -16 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_vgpr_clearlo29 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo29 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(p3) = COPY $vgpr0 + %const:vgpr(s32) = G_CONSTANT i32 -536870912 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_vgpr +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + %2:vgpr(p0) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_vgpr_0xf0f0f0f0f0f0f0f0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr_0xf0f0f0f0f0f0f0f0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -1085102592571150096 + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], [[C]](s64) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096 + %2:vgpr(p0) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p0_s32_vgpr_vgpr_vgpr +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ptrmask_p0_s32_vgpr_vgpr_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(p0) = G_PTRMASK %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_clearlo1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo1 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %const:vgpr(s64) = G_CONSTANT i64 -2 + %1:vgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_clearlo2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo2 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %const:vgpr(s64) = G_CONSTANT i64 -4 + %1:vgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_clearlo3 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo3 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %const:vgpr(s64) = G_CONSTANT i64 -4 + %1:vgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_clearlo4 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo4 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %const:vgpr(s64) = G_CONSTANT i64 -16 + %1:vgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_vgpr_vgpr_clearlo29 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo29 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %const:vgpr(s64) = G_CONSTANT i64 -536870912 + %1:vgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p3_vgpr_sgpr_clearlo2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ptrmask_p3_vgpr_sgpr_clearlo2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(p3) = COPY $sgpr0 + %const:sgpr(s32) = G_CONSTANT i32 -4 + %1:vgpr(p3) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... + +--- +name: ptrmask_p0_s64_vgpr_sgpr_clearlo2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_sgpr_clearlo2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(p0) = COPY $sgpr0_sgpr1 + %const:sgpr(s32) = G_CONSTANT i32 -4 + %1:vgpr(p0) = G_PTRMASK %0, %const + S_ENDPGM 0, implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll new file mode 100644 index 0000000000000..cc1c75e404e05 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stop-after=irtranslator < %s | FileCheck %s + +define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { + ; CHECK-LABEL: name: ptrmask_flat_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) + ret i8* %masked +} + +define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { + ; CHECK-LABEL: name: ptrmask_flat_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) + ret i8* %masked +} + +define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { + ; CHECK-LABEL: name: ptrmask_flat_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) + ret i8* %masked +} + +define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { + ; CHECK-LABEL: name: ptrmask_flat_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) + ret i8* %masked +} + +define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { + ; CHECK-LABEL: name: ptrmask_local_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) + ret i8 addrspace(3)* %masked +} + +define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { + ; CHECK-LABEL: name: ptrmask_local_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) + ret i8 addrspace(3)* %masked +} + +define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { + ; CHECK-LABEL: name: ptrmask_local_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) + ret i8 addrspace(3)* %masked +} + +define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { + ; CHECK-LABEL: name: ptrmask_local_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) + ret i8 addrspace(3)* %masked +} + +; Seems to not work +; define <2 x i8*> @ptrmask_flat_i64_v2(<2 x i8*> %ptr, <2 x i64> %mask) { +; %masked = call <2 x i8*> @llvm.ptrmask.v2p0i8.v2i64(<2 x i8*> %ptr, <2 x i64> %mask) +; ret <2 x i8*> %masked +; } + +declare i8* @llvm.ptrmask.p0i8.i64(i8*, i64) +declare i8* @llvm.ptrmask.p0i8.i32(i8*, i32) +declare i8* @llvm.ptrmask.p0i8.i16(i8*, i16) +declare i8* @llvm.ptrmask.p0i8.i1(i8*, i1) +declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) +declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) +declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) +declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)*, i1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir new file mode 100644 index 0000000000000..0449b162968f9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: ptrmask_p1_s_k +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p1_s_k + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p1) = G_PTRMASK [[COPY]], [[C]](s64) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(p1) = G_PTRMASK %0, %1 +... + +--- +name: ptrmask_p1_s_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; CHECK-LABEL: name: ptrmask_p1_s_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK: [[PTRMASK:%[0-9]+]]:sgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(p1) = G_PTRMASK %0, %1 +... + +--- +name: ptrmask_p1_v_k +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ptrmask_p1_v_k + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(p1) = G_PTRMASK %0, %1 +... + +--- +name: ptrmask_p1_v_s +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ptrmask_p1_v_s + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY2]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(p1) = G_PTRMASK %0, %1 +... + +--- +name: ptrmask_p1_v_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: ptrmask_p1_v_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(p1) = G_PTRMASK %0, %1 +... diff --git a/llvm/test/MachineVerifier/test_g_ptrmask.mir b/llvm/test/MachineVerifier/test_g_ptrmask.mir new file mode 100644 index 0000000000000..6edf1b1d894aa --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_ptrmask.mir @@ -0,0 +1,54 @@ +# REQUIRES: aarch64-registered-target +# RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s + +--- +name: test_ptr_mask +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(p0) = G_IMPLICIT_DEF + %1:_(p0) = G_IMPLICIT_DEF + %2:_(s64) = G_IMPLICIT_DEF + + ; CHECK: Bad machine code: Type mismatch in generic instruction + ; CHECK: Bad machine code: ptrmask result type must be a pointer + %3:_(s64) = G_PTRMASK %0, %2 + + ; CHECK: Bad machine code: Type mismatch in generic instruction + %4:_(p0) = G_PTRMASK %2, %2 + + ; CHECK: Bad machine code: ptrmask mask type must be an integer + %5:_(p0) = G_PTRMASK %0, %0 + + %6:_(<2 x p0>) = G_IMPLICIT_DEF + %7:_(<2 x s64>) = G_IMPLICIT_DEF + + ; CHECK: Bad machine code: Type mismatch in generic instruction + ; CHECK: Bad machine code: ptrmask result type must be a pointer + %8:_(<2 x s64>) = G_PTRMASK %6, %7 + + ; CHECK: Bad machine code: Type mismatch in generic instruction + %9:_(<2 x p0>) = G_PTRMASK %7, %7 + + ; CHECK: Bad machine code: Type mismatch in generic instruction + ; CHECK: Bad machine code: ptrmask mask type must be an integer + ; CHECK: Bad machine code: operand types must be all-vector or all-scalar + %10:_(<2 x p0>) = G_PTRMASK %0, %0 + + ; CHECK: Bad machine code: Type mismatch in generic instruction + %11:_(p0) = G_PTRMASK %6, %2 + + ; CHECK: Bad machine code: operand types must be all-vector or all-scalar + %12:_(p0) = G_PTRMASK %0, %7 + + ; CHECK: Bad machine code: operand types must be all-vector or all-scalar + %13:_(<2 x p0>) = G_PTRMASK %6, %2 + + %14:_(<4 x p0>) = G_IMPLICIT_DEF + + ; CHECK: Bad machine code: operand types must preserve number of vector elements + %15:_(<4 x p0>) = G_PTRMASK %14, %8 + +... From b59b3640bcbdfc6cf4b35ff3a6ad5f524a073b45 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Fri, 22 May 2020 17:33:03 -0700 Subject: [PATCH 103/770] Debug Info: Mark os_log helper functions as artificial The os_log helper functions are linkonce_odr and supposed to be uniqued across TUs, so attachine a DW_AT_decl_line on it is highly misleading. By setting the function decl to implicit, CGDebugInfo properly marks the functions as artificial and uses a default file / line 0 location for the function. rdar://problem/63450824 Differential Revision: https://reviews.llvm.org/D80463 --- clang/lib/CodeGen/CGBuiltin.cpp | 2 ++ clang/test/CodeGen/debug-info-oslog.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 clang/test/CodeGen/debug-info-oslog.c diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ddd9a68a8edb7..bef0ad27145f3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1271,6 +1271,8 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, FuncionTy, nullptr, SC_PrivateExtern, false, false); + // Avoid generating debug location info for the function. + FD->setImplicit(); StartFunction(FD, ReturnTy, Fn, FI, Args); diff --git a/clang/test/CodeGen/debug-info-oslog.c b/clang/test/CodeGen/debug-info-oslog.c new file mode 100644 index 0000000000000..c32c79eb8a6f4 --- /dev/null +++ b/clang/test/CodeGen/debug-info-oslog.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple x86_64-darwin-apple -debug-info-kind=limited \ +// RUN: %s -emit-llvm -o - | FileCheck %s +void test_builtin_os_log(void *buf, int i, const char *data) { + __builtin_os_log_format(buf, "%d", i); +} + +// CHECK: define linkonce_odr {{.*}}@__os_log_helper_1_0_1_4_0( +// CHECK-SAME: !dbg ![[OS_LOG_HELPER:[0-9]+]] + +// This helper is going to be uniqued, so it should not have a line +// number between file and type. + +// CHECK: distinct !DISubprogram(name: "__os_log_helper_1_0_1_4_0", +// CHECK-SAME: file: !{{[0-9+]}}, type +// CHECK-SAME: flags: DIFlagArtificial From 6b7d51ad4a16579b0a7d41c77715be4d9e266d8c Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 26 May 2020 08:53:02 -0700 Subject: [PATCH 104/770] Add missing forward decl to unbreak the modular build --- clang/include/clang/Index/IndexingOptions.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Index/IndexingOptions.h b/clang/include/clang/Index/IndexingOptions.h index 2dd276998abf7..9f5c03d1b3b94 100644 --- a/clang/include/clang/Index/IndexingOptions.h +++ b/clang/include/clang/Index/IndexingOptions.h @@ -14,6 +14,7 @@ #include namespace clang { +class Decl; namespace index { struct IndexingOptions { From 50d4b22ca0dd8f25a2ab2cb53a04627b2504ecfe Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 20 May 2020 13:26:10 -0400 Subject: [PATCH 105/770] AMDGPU/GlobalISel: Fix assert on 16-bit G_EXTRACT results I consider this to be a hack, since we probably should not mark any 16-bit extract as legal, and require all extracts to be done on multiples of 32. There are quite a few more battles to fight in the legalizer for sub-dword vectors, so just select this for now so we can pass OpenCL conformance without crashing. Also fix the same assert for G_INSERTs. Unlike G_EXTRACT there's not a trivial way to select this so just fail on it. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 11 +++- .../AMDGPU/GlobalISel/inst-select-extract.mir | 57 +++++++++++++++++++ .../GlobalISel/inst-select-insert.xfail.mir | 19 +++++++ 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e47f25f7828d5..aee6c0dd8a8e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -499,13 +499,18 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { LLT DstTy = MRI->getType(DstReg); LLT SrcTy = MRI->getType(SrcReg); const unsigned SrcSize = SrcTy.getSizeInBits(); - const unsigned DstSize = DstTy.getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); // TODO: Should handle any multiple of 32 offset. unsigned Offset = I.getOperand(2).getImm(); if (Offset % 32 != 0 || DstSize > 128) return false; + // 16-bit operations really use 32-bit registers. + // FIXME: Probably should not allow 16-bit G_EXTRACT results. + if (DstSize == 16) + DstSize = 32; + const TargetRegisterClass *DstRC = TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI); if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) @@ -728,7 +733,9 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { unsigned InsSize = Src1Ty.getSizeInBits(); int64_t Offset = I.getOperand(3).getImm(); - if (Offset % 32 != 0) + + // FIXME: These cases should have been illegal and unnecessary to check here. + if (Offset % 32 != 0 || InsSize % 32 != 0) return false; unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir index 795ebc6a1a9a6..df16e9c1f0917 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -255,3 +255,60 @@ body: | S_ENDPGM 0, implicit %1 ... + +# FIXME: Probably should not be legal +--- +name: extract_sgpr_s16_from_v4s16_offset0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]] + %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + %1:sgpr(s16) = G_EXTRACT %0, 0 + S_ENDPGM 0, implicit %1 + +... + +# FIXME: Probably should not be legal +--- +name: extract_sgpr_s16_from_v4s16_offset32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]] + %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + %1:sgpr(s16) = G_EXTRACT %0, 32 + S_ENDPGM 0, implicit %1 + +... + +# FIXME: Probably should not be legal +--- +name: extract_sgpr_s16_from_v6s16_offset32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2 + ; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]] + %0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s16) = G_EXTRACT %0, 32 + S_ENDPGM 0, implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir new file mode 100644 index 0000000000000..5e58e8b633ec4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir @@ -0,0 +1,19 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +# FIXME: This should not be legal and this test should be deleted +# ERR: remark: :0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_2s16_to_v4s16_offset0) +--- +name: insert_sgpr_2s16_to_v4s16_offset0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s16) = G_TRUNC %1 + %3:sgpr(<4 x s16>) = G_INSERT %0, %2, 0 + S_ENDPGM 0, implicit %3 + +... From 5bd97eb28aff252a3a9e8b0ef00d563b557f5580 Mon Sep 17 00:00:00 2001 From: Sanne Wouda Date: Tue, 26 May 2020 17:11:32 +0100 Subject: [PATCH 106/770] Fix MemoryLocation.h use without Instructions.h MemoryLocation.h was changed to only include Instruction.h. However, cast<> still needs the full definiton, so move MemoryLocation::getOrNone to the cpp file. --- llvm/include/llvm/Analysis/MemoryLocation.h | 17 +---------------- llvm/lib/Analysis/MemoryLocation.cpp | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index ce70df66ab7a8..f7bb15d256fde 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -221,22 +221,7 @@ class MemoryLocation { static MemoryLocation get(const Instruction *Inst) { return *MemoryLocation::getOrNone(Inst); } - static Optional getOrNone(const Instruction *Inst) { - switch (Inst->getOpcode()) { - case Instruction::Load: - return get(cast(Inst)); - case Instruction::Store: - return get(cast(Inst)); - case Instruction::VAArg: - return get(cast(Inst)); - case Instruction::AtomicCmpXchg: - return get(cast(Inst)); - case Instruction::AtomicRMW: - return get(cast(Inst)); - default: - return None; - } - } + static Optional getOrNone(const Instruction *Inst); /// Return a location representing the source of a memory transfer. static MemoryLocation getForSource(const MemTransferInst *MTI); diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 103cdea148e5e..4c31d6786ed8e 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -83,6 +83,23 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) { AATags); } +Optional MemoryLocation::getOrNone(const Instruction *Inst) { + switch (Inst->getOpcode()) { + case Instruction::Load: + return get(cast(Inst)); + case Instruction::Store: + return get(cast(Inst)); + case Instruction::VAArg: + return get(cast(Inst)); + case Instruction::AtomicCmpXchg: + return get(cast(Inst)); + case Instruction::AtomicRMW: + return get(cast(Inst)); + default: + return None; + } +} + MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { return getForSource(cast(MTI)); } From d6c8736287371f1c9eba3629819209c5fb54e546 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Tue, 26 May 2020 10:37:51 -0400 Subject: [PATCH 107/770] [PowerPC][AIX] Spill CSRs to the ABI specified stack offsets. Extend the CSR save/restore insertion code to support both 32-bit and 64-bit AIX. Differential Revision: https://reviews.llvm.org/D79252 --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 55 ++-- .../CodeGen/PowerPC/aix-calleesavedregs.ll | 4 +- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 2 +- llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll | 12 +- llvm/test/CodeGen/PowerPC/aix-csr.ll | 270 ++++++++++++++++++ llvm/test/CodeGen/PowerPC/aix32-crsave.mir | 28 +- llvm/test/CodeGen/PowerPC/ppc64-crsave.mir | 35 +-- 7 files changed, 350 insertions(+), 56 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-csr.ll diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index a21a9c6f50adf..7da24f03bc7a7 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -96,11 +96,6 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( unsigned &NumEntries) const { - // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) { - NumEntries = 0; - return nullptr; - } // Floating-point register save area offsets. #define CALLEE_SAVED_FPRS \ @@ -123,7 +118,8 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( {PPC::F15, -136}, \ {PPC::F14, -144} -// 32-bit general purpose register save area offsets. +// 32-bit general purpose register save area offsets shared by ELF and +// AIX. AIX has an extra CSR with r13. #define CALLEE_SAVED_GPRS32 \ {PPC::R31, -4}, \ {PPC::R30, -8}, \ @@ -183,7 +179,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( // Note that the offsets here overlap, but this is fixed up in // processFunctionBeforeFrameFinalized. - static const SpillSlot Offsets[] = { + static const SpillSlot ELFOffsets32[] = { CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS32, @@ -218,25 +214,48 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( {PPC::S15, -136}, {PPC::S14, -144}}; - static const SpillSlot Offsets64[] = { + static const SpillSlot ELFOffsets64[] = { CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, // VRSAVE save area offset. {PPC::VRSAVE, -4}, - CALLEE_SAVED_VRS }; - if (Subtarget.isPPC64()) { - NumEntries = array_lengthof(Offsets64); + static const SpillSlot AIXOffsets32[] = { + CALLEE_SAVED_FPRS, + CALLEE_SAVED_GPRS32, + // Add AIX's extra CSR. + {PPC::R13, -76}, + // TODO Update when we add vector support for AIX. + }; - return Offsets64; - } else { - NumEntries = array_lengthof(Offsets); + static const SpillSlot AIXOffsets64[] = { + CALLEE_SAVED_FPRS, + CALLEE_SAVED_GPRS64, + // TODO Update when we add vector support for AIX. + }; + + if (Subtarget.is64BitELFABI()) { + NumEntries = array_lengthof(ELFOffsets64); + return ELFOffsets64; + } - return Offsets; + if (Subtarget.is32BitELFABI()) { + NumEntries = array_lengthof(ELFOffsets32); + return ELFOffsets32; } + + assert(Subtarget.isAIXABI() && "Unexpected ABI."); + + if (Subtarget.isPPC64()) { + NumEntries = array_lengthof(AIXOffsets64); + return AIXOffsets64; + } + + NumEntries = array_lengthof(AIXOffsets32); + return AIXOffsets32; } /// RemoveVRSaveCode - We have found that this function does not need any code @@ -1805,12 +1824,6 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { - // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) { - addScavengingSpillSlot(MF, RS); - return; - } - // Get callee saved register information. MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); diff --git a/llvm/test/CodeGen/PowerPC/aix-calleesavedregs.ll b/llvm/test/CodeGen/PowerPC/aix-calleesavedregs.ll index e0826b5031292..5f89222e1eb70 100644 --- a/llvm/test/CodeGen/PowerPC/aix-calleesavedregs.ll +++ b/llvm/test/CodeGen/PowerPC/aix-calleesavedregs.ll @@ -7,5 +7,5 @@ define void @usethirteen() { ret void } -; CHECK: stw 13, -4(1) -; CHECK: lwz 13, -4(1) +; CHECK: stw 13, -76(1) +; CHECK: lwz 13, -76(1) diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 36f070e7f162d..4e4d921bd6862 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -2337,7 +2337,7 @@ define void @caller_mix() { ; ASM64PWR4: mflr 0 ; ASM64PWR4-DAG: std 0, 16(1) -; ASM64PWR4-DAG: stdu 1, -240(1) +; ASM64PWR4-DAG: stdu 1, -256(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 112(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 120(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 128(1) diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll index 3f24a43ad6b2c..95b009c13ae50 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll @@ -203,7 +203,7 @@ entry: ; CHECKASM-LABEL: .call_test_byval_mem3: -; ASM32BIT: stwu 1, -96(1) +; ASM32BIT: stwu 1, -112(1) ; ASM32BIT-DAG: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2) ; ASM32BIT-DAG: addi 3, 1, 56 ; ASM32BIT-DAG: addi 4, [[REG]], 24 @@ -216,7 +216,7 @@ entry: ; ASM32BIT-DAG: lwz 9, 16([[REG]]) ; ASM32BIT-DAG: lwz 10, 20([[REG]]) ; ASM32BIT: bl .test_byval_mem3 -; ASM32BIT: addi 1, 1, 96 +; ASM32BIT: addi 1, 1, 112 ; The memcpy call was inlined in 64-bit so MIR test is redundant and omitted. ; ASM64BIT: stdu 1, -128(1) @@ -319,7 +319,7 @@ entry: ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 316, 0, implicit-def dead $r1, implicit $r1 -; ASM32BIT: stwu 1, -320(1) +; ASM32BIT: stwu 1, -336(1) ; ASM32BIT-NEXT: stw [[REG1:[0-9]+]], {{[0-9]+}}(1) ; ASM32BIT: lwz [[REG1]], LC{{[0-9]+}}(2) ; ASM32BIT-DAG: lhz [[REG2:[0-9]+]], 28([[REG1]]) @@ -338,7 +338,7 @@ entry: ; ASM32BIT-DAG: lwz 9, 20([[REG1]]) ; ASM32BIT-DAG: lwz 10, 24([[REG1]]) ; ASM32BIT: bl .test_byval_mem4 -; ASM32BIT: addi 1, 1, 320 +; ASM32BIT: addi 1, 1, 336 ; Confirm the expected memcpy call is independent of the call to test_byval_mem4. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 @@ -363,7 +363,7 @@ entry: ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 344, 0, implicit-def dead $r1, implicit $r1 -; ASM64BIT: stdu 1, -352(1) +; ASM64BIT: stdu 1, -368(1) ; ASM64BIT-DAG: ld [[REG1:[0-9]+]], LC{{[0-9]+}}(2) ; ASM64BIT-DAG: addi 3, 1, 112 ; ASM64BIT-DAG: addi 4, [[REG1]], 24 @@ -383,7 +383,7 @@ entry: ; ASM64BIT-DAG: ld 9, 8([[REG1]]) ; ASM64BIT-DAG: ld 10, 16([[REG1]]) ; ASM64BIT: bl .test_byval_mem4 -; ASM64BIT: addi 1, 1, 352 +; ASM64BIT: addi 1, 1, 368 define void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %struct_S256* byval(%struct_S256) align 1 %s) { entry: diff --git a/llvm/test/CodeGen/PowerPC/aix-csr.ll b/llvm/test/CodeGen/PowerPC/aix-csr.ll new file mode 100644 index 0000000000000..74da61b0ad67a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-csr.ll @@ -0,0 +1,270 @@ +; RUN: llc -mtriple=powerpc64-unknown-aix-xcoff -verify-machineinstrs \ +; RUN: -mcpu=pwr4 -mattr=-altivec -stop-after=prologepilog < %s | \ +; RUN: FileCheck --check-prefix=MIR64 %s + +; RUN: llc -mtriple=powerpc64-unknown-aix-xcoff -verify-machineinstrs \ +; RUN: -mcpu=pwr4 -mattr=-altivec < %s | FileCheck --check-prefix=ASM64 %s + +; RUN: llc -mtriple=powerpc-unknown-aix-xcoff -verify-machineinstrs \ +; RUN: -mcpu=pwr4 -mattr=-altivec -stop-after=prologepilog < %s | \ +; RUN: FileCheck --check-prefix=MIR32 %s + +; RUN: llc -mtriple=powerpc-unknown-aix-xcoff -verify-machineinstrs \ +; RUN: -mcpu=pwr4 -mattr=-altivec < %s | FileCheck --check-prefix=ASM32 %s + +define dso_local signext i32 @gprs_only(i32 signext %i) { +entry: + call void asm sideeffect "", "~{r16},~{r22},~{r30}"() + ret i32 %i +} + +; MIR64: name: gprs_only +; MIR64-LABEL: fixedStack: +; MIR64-NEXT: - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 1, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$x22', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 2, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$x16', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: stack: [] + +; MIR32: name: gprs_only +; MIR32-LABEL: fixedStack: +; MIR32: - { id: 0, type: spill-slot, offset: -8, size: 4, alignment: 8, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r30', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 1, type: spill-slot, offset: -40, size: 4, alignment: 8, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r22', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 2, type: spill-slot, offset: -64, size: 4, alignment: 16, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r16', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: stack: [] + + +; MIR64: liveins: $x3, $x16, $x22, $x30 + +; MIR64-DAG: STD killed $x16, -128, $x1 :: (store 8 into %fixed-stack.2, align 16) +; MIR64-DAG: STD killed $x22, -80, $x1 :: (store 8 into %fixed-stack.1, align 16) +; MIR64-DAG: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16) + +; MIR64: INLINEASM + +; MIR64-DAG: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16) +; MIR64-DAG: $x22 = LD -80, $x1 :: (load 8 from %fixed-stack.1, align 16) +; MIR64-DAG: $x16 = LD -128, $x1 :: (load 8 from %fixed-stack.2, align 16) +; MIR64: BLR8 implicit $lr8, implicit $rm, implicit $x3 + + +; MIR32: liveins: $r3, $r16, $r22, $r30 + +; MIR32-DAG: STW killed $r16, -64, $r1 :: (store 4 into %fixed-stack.2, align 16) +; MIR32-DAG: STW killed $r22, -40, $r1 :: (store 4 into %fixed-stack.1, align 8) +; MIR32-DAG: STW killed $r30, -8, $r1 :: (store 4 into %fixed-stack.0, align 8) + +; MIR32: INLINEASM + +; MIR32-DAG: $r30 = LWZ -8, $r1 :: (load 4 from %fixed-stack.0, align 8) +; MIR32-DAG: $r22 = LWZ -40, $r1 :: (load 4 from %fixed-stack.1, align 8) +; MIR32-DAG: $r16 = LWZ -64, $r1 :: (load 4 from %fixed-stack.2, align 16) +; MIR32: BLR implicit $lr, implicit $rm, implicit $r3 + + +; ASM64-LABEL: .gprs_only: +; ASM64-DAG: std 16, -128(1) # 8-byte Folded Spill +; ASM64-DAG: std 22, -80(1) # 8-byte Folded Spill +; ASM64-DAG: std 30, -16(1) # 8-byte Folded Spill +; ASM64: #APP +; ASM64-DAG: ld 30, -16(1) # 8-byte Folded Reload +; ASM64-DAG: ld 22, -80(1) # 8-byte Folded Reload +; ASM64-DAG: ld 16, -128(1) # 8-byte Folded Reload +; ASM64: blr + +; ASM32-LABEl: .gprs_only: +; ASM32-DAG: stw 16, -64(1) # 4-byte Folded Spill +; ASM32-DAG: stw 22, -40(1) # 4-byte Folded Spill +; ASM32-DAG: stw 30, -8(1) # 4-byte Folded Spill +; ASM32: #APP +; ASM32-DAG: lwz 30, -8(1) # 4-byte Folded Reload +; ASM32-DAG: lwz 22, -40(1) # 4-byte Folded Reload +; ASM32-DAG: lwz 16, -64(1) # 4-byte Folded Reload +; ASM32-DAG: blr + + +declare double @dummy(i32 signext); + +define dso_local double @fprs_and_gprs(i32 signext %i) { + call void asm sideeffect "", "~{r13},~{r14},~{r25},~{r31},~{f14},~{f19},~{f21},~{f31}"() + %result = call double @dummy(i32 signext %i) + ret double %result +} + +; MIR64: name: fprs_and_gprs +; MIR64-LABEL: fixedStack: +; MIR64-NEXT: - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 2, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 3, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 4, type: spill-slot, offset: -152, size: 8, alignment: 8, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$x31', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 5, type: spill-slot, offset: -200, size: 8, alignment: 8, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$x25', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: - { id: 6, type: spill-slot, offset: -288, size: 8, alignment: 16, stack-id: default, +; MIR64-NEXT: callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '', +; MIR64-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR64-NEXT: stack: [] + +; MIR32: name: fprs_and_gprs +; MIR32-LABEL: fixedStack: +; MIR32-NEXT: - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 2, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 3, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 4, type: spill-slot, offset: -148, size: 4, alignment: 4, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 5, type: spill-slot, offset: -172, size: 4, alignment: 4, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r25', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 6, type: spill-slot, offset: -216, size: 4, alignment: 8, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: - { id: 7, type: spill-slot, offset: -220, size: 4, alignment: 4, stack-id: default, +; MIR32-NEXT: callee-saved-register: '$r13', callee-saved-restored: true, debug-info-variable: '', +; MIR32-NEXT: debug-info-expression: '', debug-info-location: '' } +; MIR32-NEXT: stack: [] + + +; MIR64: liveins: $x3, $x14, $x25, $x31, $f14, $f19, $f21, $f31 + +; MIR64: $x0 = MFLR8 implicit $lr8 +; MIR64-NEXT: STD killed $x0, 16, $x1 +; MIR64-NEXT: $x1 = STDU $x1, -400, $x1 +; MIR64-DAG: STD killed $x14, 112, $x1 :: (store 8 into %fixed-stack.6, align 16) +; MIR64-DAG: STD killed $x25, 200, $x1 :: (store 8 into %fixed-stack.5) +; MIR64-DAG: STD killed $x31, 248, $x1 :: (store 8 into %fixed-stack.4) +; MIR64-DAG: STFD killed $f14, 256, $x1 :: (store 8 into %fixed-stack.3, align 16) +; MIR64-DAG: STFD killed $f19, 296, $x1 :: (store 8 into %fixed-stack.2) +; MIR64-DAG: STFD killed $f21, 312, $x1 :: (store 8 into %fixed-stack.1) +; MIR64-DAG: STFD killed $f31, 392, $x1 :: (store 8 into %fixed-stack.0) + +; MIR64: INLINEASM +; MIR64-NEXT: BL8_NOP + +; MIR64-DAG: $f31 = LFD 392, $x1 :: (load 8 from %fixed-stack.0) +; MIR64-DAG: $f21 = LFD 312, $x1 :: (load 8 from %fixed-stack.1) +; MIR64-DAG: $f19 = LFD 296, $x1 :: (load 8 from %fixed-stack.2) +; MIR64-DAG: $f14 = LFD 256, $x1 :: (load 8 from %fixed-stack.3, align 16) +; MIR64-DAG: $x31 = LD 248, $x1 :: (load 8 from %fixed-stack.4) +; MIR64-DAG: $x25 = LD 200, $x1 :: (load 8 from %fixed-stack.5) +; MIR64-DAG: $x14 = LD 112, $x1 :: (load 8 from %fixed-stack.6, align 16) +; MIR64: $x1 = ADDI8 $x1, 400 +; MIR64-NEXT: $x0 = LD 16, $x1 +; MIR64-NEXT: MTLR8 $x0, implicit-def $lr8 +; MIR64-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 + + +; MIR32: liveins: $r3, $r13, $r14, $r25, $r31, $f14, $f19, $f21, $f31 + +; MIR32: $r0 = MFLR implicit $lr +; MIR32-NEXT: STW killed $r0, 8, $r1 +; MIR32-NEXT: $r1 = STWU $r1, -288, $r1 +; MIR32-DAG: STW killed $r13, 68, $r1 :: (store 4 into %fixed-stack.7) +; MIR32-DAG: STW killed $r14, 72, $r1 :: (store 4 into %fixed-stack.6, align 8) +; MIR32-DAG: STW killed $r25, 116, $r1 :: (store 4 into %fixed-stack.5) +; MIR32-DAG: STW killed $r31, 140, $r1 :: (store 4 into %fixed-stack.4) +; MIR32-DAG: STFD killed $f14, 144, $r1 :: (store 8 into %fixed-stack.3, align 16) +; MIR32-DAG: STFD killed $f19, 184, $r1 :: (store 8 into %fixed-stack.2) +; MIR32-DAG: STFD killed $f21, 200, $r1 :: (store 8 into %fixed-stack.1) +; MIR32-DAG: STFD killed $f31, 280, $r1 :: (store 8 into %fixed-stack.0) + +; MIR32: INLINEASM +; MIR32: BL_NOP + +; MIR32-DAG: $f31 = LFD 280, $r1 :: (load 8 from %fixed-stack.0) +; MIR32-DAG: $f21 = LFD 200, $r1 :: (load 8 from %fixed-stack.1) +; MIR32-DAG: $f19 = LFD 184, $r1 :: (load 8 from %fixed-stack.2) +; MIR32-DAG: $f14 = LFD 144, $r1 :: (load 8 from %fixed-stack.3, align 16) +; MIR32-DAG: $r31 = LWZ 140, $r1 :: (load 4 from %fixed-stack.4) +; MIR32-DAG: $r25 = LWZ 116, $r1 :: (load 4 from %fixed-stack.5) +; MIR32-DAG: $r14 = LWZ 72, $r1 :: (load 4 from %fixed-stack.6, align 8) +; MIR32-DAG: $r13 = LWZ 68, $r1 :: (load 4 from %fixed-stack.7) +; MIR32: $r1 = ADDI $r1, 288 +; MIR32-NEXT: $r0 = LWZ 8, $r1 +; MIR32-NEXT: MTLR $r0, implicit-def $lr +; MIR32-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 + +; ASM64-LABEL: .fprs_and_gprs: +; ASM64: mflr 0 +; ASM64-NEXT: std 0, 16(1) +; ASM64-NEXT: stdu 1, -400(1) +; ASM64-DAG: std 14, 112(1) # 8-byte Folded Spill +; ASM64-DAG: std 25, 200(1) # 8-byte Folded Spill +; ASM64-DAG: std 31, 248(1) # 8-byte Folded Spill +; ASM64-DAG: stfd 14, 256(1) # 8-byte Folded Spill +; ASM64-DAG: stfd 19, 296(1) # 8-byte Folded Spill +; ASM64-DAG: stfd 21, 312(1) # 8-byte Folded Spill +; ASM64-DAG: stfd 31, 392(1) # 8-byte Folded Spill + +; ASM64: bl .dummy + +; ASM64-DAG: lfd 31, 392(1) # 8-byte Folded Reload +; ASM64-DAG: lfd 21, 312(1) # 8-byte Folded Reload +; ASM64-DAG: lfd 19, 296(1) # 8-byte Folded Reload +; ASM64-DAG: lfd 14, 256(1) # 8-byte Folded Reload +; ASM64-DAG: ld 31, 248(1) # 8-byte Folded Reload +; ASM64-DAG: ld 25, 200(1) # 8-byte Folded Reload +; ASM64-DAG: ld 14, 112(1) # 8-byte Folded Reload +; ASM64: addi 1, 1, 400 +; ASM64-NEXT: ld 0, 16(1) +; ASM64-NEXT: mtlr 0 +; ASM64-NEXT: blr + +; ASM32-LABEL: .fprs_and_gprs: +; ASM32: mflr 0 +; ASM32-NEXT: stw 0, 8(1) +; ASM32-NEXT: stwu 1, -288(1) +; ASM32-DAG: stw 13, 68(1) # 4-byte Folded Spill +; ASM32-DAG: stw 14, 72(1) # 4-byte Folded Spill +; ASM32-DAG: stw 25, 116(1) # 4-byte Folded Spill +; ASM32-DAG: stw 31, 140(1) # 4-byte Folded Spill +; ASM32-DAG: stfd 14, 144(1) # 8-byte Folded Spill +; ASM32-DAG: stfd 19, 184(1) # 8-byte Folded Spill +; ASM32-DAG: stfd 21, 200(1) # 8-byte Folded Spill +; ASM32-DAG: stfd 31, 280(1) # 8-byte Folded Spill + +; ASM32-DAG: bl .dummy + +; ASM32-DAG: lfd 31, 280(1) # 8-byte Folded Reload +; ASM32-DAG: lfd 21, 200(1) # 8-byte Folded Reload +; ASM32-DAG: lfd 19, 184(1) # 8-byte Folded Reload +; ASM32-DAG: lfd 14, 144(1) # 8-byte Folded Reload +; ASM32-DAG: lwz 31, 140(1) # 4-byte Folded Reload +; ASM32-DAG: lwz 25, 116(1) # 4-byte Folded Reload +; ASM32-DAG: lwz 14, 72(1) # 4-byte Folded Reload +; ASM32-DAG: lwz 13, 68(1) # 4-byte Folded Reload +; ASM32: addi 1, 1, 288 +; ASM32-NEXT: lwz 0, 8(1) +; ASM32-NEXT: mtlr 0 +; ASM32-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/aix32-crsave.mir b/llvm/test/CodeGen/PowerPC/aix32-crsave.mir index 5a82bff33e973..8faf10233f7db 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-crsave.mir +++ b/llvm/test/CodeGen/PowerPC/aix32-crsave.mir @@ -17,20 +17,25 @@ body: | renamable $r3 = COPY $r29 BLR implicit $lr, implicit $rm, implicit $r3 - ; CHECK-LABEL: fixedStack: - ; CHECK: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '$cr4', - ; CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', - ; CHECK-NEXT: debug-info-location: '' } - ; CHECK-LABEL: stack: + ; CHECK-LABEL: fixedStack: + ; CHECK-NEXT: - { id: 0, type: spill-slot, offset: -12, size: 4, alignment: 4, stack-id: default, + ; CHECK-NEXT: callee-saved-register: '$r29', callee-saved-restored: true, debug-info-variable: '', + ; CHECK-NEXT: debug-info-expression: '', debug-info-location: '' } + ; CHECK-NEXT: - { id: 1, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, + ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '$cr4', + ; CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', + ; CHECK-NEXT: debug-info-location: '' } + ; CHECK-LABEL: stack: ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $r3, $r29, $cr2, $cr4 ; CHECK: $r12 = MFCR implicit killed $cr2, implicit killed $cr4 ; CHECK-NEXT: STW killed $r12, 4, $r1 + ; CHECK-NEXT: STW killed $r29, -12, $r1 :: (store 4 into %fixed-stack.0) - ; CHECK: $r12 = LWZ 4, $r1 + ; CHECK: $r29 = LWZ -12, $r1 :: (load 4 from %fixed-stack.0) + ; CHECK-NEXT: $r12 = LWZ 4, $r1 ; CHECK-NEXT: $cr2 = MTOCRF $r12 ; CHECK-NEXT: $cr4 = MTOCRF killed $r12 @@ -50,7 +55,10 @@ body: | BLR implicit $lr, implicit $rm, implicit $r3 ; CHECK-LABEL: fixedStack: - ; CHECK: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, + ; CHECK-NEXT: - { id: 0, type: spill-slot, offset: -72, size: 4, alignment: 8, stack-id: default, + ; CHECK-NEXT: callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '', + ; CHECK-NEXT: debug-info-expression: '', debug-info-location: '' } + ; CHECK-NEXT: - { id: 1, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '$cr3', ; CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', ; CHECK-NEXT: debug-info-location: '' } @@ -61,6 +69,8 @@ body: | ; CHECK: $r12 = MFCR implicit killed $cr3 ; CHECK-NEXT: STW killed $r12, 4, $r1 + ; CHECK-NEXT: STW killed $r14, -72, $r1 :: (store 4 into %fixed-stack.0, align 8) - ; CHECK: $r12 = LWZ 4, $r1 + ; CHECK: $r14 = LWZ -72, $r1 :: (load 4 from %fixed-stack.0, align 8) + ; CHECK-NEXT: $r12 = LWZ 4, $r1 ; CHECK-NEXT: $cr3 = MTOCRF killed $r12 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir index b6a8748c8e3ee..b7c0b7ef8b66b 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir +++ b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir @@ -1,19 +1,16 @@ # RUN: llc -mtriple powerpc64le-unknown-linux-gnu -x mir -mcpu=pwr8 \ # RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \ -# RUN: FileCheck %s --check-prefixes=CHECK,SAVEONE,ELF +# RUN: FileCheck %s --check-prefixes=CHECK,SAVEONE # RUN: llc -mtriple powerpc64-unknown-linux-gnu -x mir -mcpu=pwr7 \ # RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \ -# RUN: FileCheck %s --check-prefixes=CHECK,SAVEALL,ELF +# RUN: FileCheck %s --check-prefixes=CHECK,SAVEALL # RUN: llc -mtriple powerpc64-unknown-aix-xcoff -x mir -mcpu=pwr4 \ # RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \ # RUN: FileCheck %s --check-prefixes=CHECK,SAVEALL -# TODO FIXME: We only check the save and restores of the callee saved gpr for -# ELF becuase AIX callee saved registers haven't been properly implemented yet. - --- name: CRAllSave alignment: 16 @@ -30,21 +27,23 @@ body: | BLR8 implicit $lr8, implicit $rm, implicit $x3 ; CHECK-LABEL: fixedStack: - ; ELF: - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, - ; AIX: - { id: 0, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, - ; CHECK: isImmutable: true, isAliased: false, callee-saved-register: '$cr4', - ; CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', - ; CHECK-NEXT: debug-info-location: '' } + ; CHECK-NEXT: - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default, + ; CHECK-NEXT: callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '', + ; CHECK-NEXT: debug-info-expression: '', debug-info-location: '' } + ; CHECK-NEXT: - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, + ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '$cr4', + ; CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', + ; CHECK-NEXT: debug-info-location: '' } ; CHECK-LABEL: stack: ; Verify the proper live-ins have been added in the prologue. ; CHECK: liveins: $x3, $x29, $cr2, $cr4 ; CHECK: $x12 = MFCR8 implicit killed $cr2, implicit killed $cr4 - ; ELF-DAG: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0) + ; CHECK-DAG: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0) ; CHECK-DAG: STW8 killed $x12, 8, $x1 - ; ELF: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0) + ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0) ; CHECK: $x12 = LWZ8 8, $x1 ; CHECK: $cr2 = MTOCRF8 $x12 ; CHECK: $cr4 = MTOCRF8 killed $x12 @@ -67,9 +66,11 @@ body: | ; CHECK-LABEL: CR2Save ; CHECK-LABEL: fixedStack: - ; ELF: - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, - ; AIX: - { id: 0, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, - ; CHECK: isImmutable: true, isAliased: false, callee-saved-register: '$cr2', + ; CHECK-NEXT: - { id: 0, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default, + ; CHECK-NEXT: callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '', + ; CHECK-NEXT: debug-info-expression: '', debug-info-location: '' } + ; CHECK-NEXT: - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, + ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '$cr2', ; CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', ; CHECK-NEXT: debug-info-location: '' } ; CHECK-LABEL: stack: @@ -82,10 +83,10 @@ body: | ; SAVEONE: $x12 = MFOCRF8 killed $cr2 ; SAVEALL: $x12 = MFCR8 implicit killed $cr2 - ; ELF-DAG: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.0, align 16) + ; CHECK-DAG: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.0, align 16) ; CHECK-DAG: STW8 killed $x12, 8, $x1 - ; ELF: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.0, align 16) + ; CHECK: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.0, align 16) ; CHECK: $x12 = LWZ8 8, $x1 ; CHECK: $cr2 = MTOCRF8 killed $x12 From bae7cf674621b5892a036fabe77692a59e2b115b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 14 May 2020 13:55:20 -0700 Subject: [PATCH 108/770] [ELF][PPC64] Synthesize _savegpr[01]_{14..31} and _restgpr[01]_{14..31} In the 64-bit ELF V2 API Specification: Power Architecture, 2.3.3.1. GPR Save and Restore Functions defines some special functions which may be referenced by GCC produced assembly (LLVM does not reference them). With GCC -Os, when the number of call-saved registers exceeds a certain threshold, GCC generates `_savegpr0_* _restgpr0_*` calls and expects the linker to define them. See https://sourceware.org/pipermail/binutils/2002-February/017444.html and https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is weird because libgcc.a would be the natural place. However, the linker generation approach has the advantage that the linker can generate multiple copies to avoid long branch thunks. We don't consider the advantage significant enough to complicate our trunk implementation, so we take a simple approach. * Check whether `_savegpr0_{14..31}` are used * If yes, define needed symbols and add an InputSection with the code sequence. `_savegpr1_*` `_restgpr0_*` and `_restgpr1_*` are similar. Reviewed By: sfertile Differential Revision: https://reviews.llvm.org/D79977 --- lld/ELF/Arch/PPC64.cpp | 80 +++++++++++++++++++++++++++++++++++ lld/ELF/Target.h | 1 + lld/ELF/Writer.cpp | 2 + lld/test/ELF/ppc64-restgpr0.s | 38 +++++++++++++++++ lld/test/ELF/ppc64-restgpr1.s | 34 +++++++++++++++ lld/test/ELF/ppc64-savegpr0.s | 36 ++++++++++++++++ lld/test/ELF/ppc64-savegpr1.s | 34 +++++++++++++++ lld/test/ELF/ppc64-saveres.s | 31 ++++++++++++++ 8 files changed, 256 insertions(+) create mode 100644 lld/test/ELF/ppc64-restgpr0.s create mode 100644 lld/test/ELF/ppc64-restgpr1.s create mode 100644 lld/test/ELF/ppc64-savegpr0.s create mode 100644 lld/test/ELF/ppc64-savegpr1.s create mode 100644 lld/test/ELF/ppc64-saveres.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index b80f96f28b46b..a182c77209aef 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" #include "llvm/Support/Endian.h" using namespace llvm; @@ -104,6 +106,84 @@ bool elf::isPPC64SmallCodeModelTocReloc(RelType type) { return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } +static bool addOptional(StringRef name, uint64_t value, + std::vector &defined) { + Symbol *sym = symtab->find(name); + if (!sym || sym->isDefined()) + return false; + sym->resolve(Defined{/*file=*/nullptr, saver.save(name), STB_GLOBAL, + STV_HIDDEN, STT_FUNC, value, + /*size=*/0, /*section=*/nullptr}); + defined.push_back(cast(sym)); + return true; +} + +// If from is 14, write ${prefix}14: firstInsn; ${prefix}15: +// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)*0x200008; $tail +// The labels are defined only if they exist in the symbol table. +static void writeSequence(MutableArrayRef buf, const char *prefix, + int from, uint32_t firstInsn, + ArrayRef tail) { + std::vector defined; + char name[16]; + int first; + uint32_t *ptr = buf.data(); + for (int r = from; r < 32; ++r) { + format("%s%d", prefix, r).snprint(name, sizeof(name)); + if (addOptional(name, 4 * (r - from), defined) && defined.size() == 1) + first = r - from; + write32(ptr++, firstInsn + 0x200008 * (r - from)); + } + for (uint32_t insn : tail) + write32(ptr++, insn); + assert(ptr == &*buf.end()); + + if (defined.empty()) + return; + // The full section content has the extent of [begin, end). We drop unused + // instructions and write [first,end). + auto *sec = make( + nullptr, SHF_ALLOC, SHT_PROGBITS, 4, + makeArrayRef(reinterpret_cast(buf.data() + first), + 4 * (buf.size() - first)), + ".text"); + inputSections.push_back(sec); + for (Defined *sym : defined) { + sym->section = sec; + sym->value -= 4 * first; + } +} + +// Implements some save and restore functions as described by ELF V2 ABI to be +// compatible with GCC. With GCC -Os, when the number of call-saved registers +// exceeds a certain threshold, GCC generates _savegpr0_* _restgpr0_* calls and +// expects the linker to define them. See +// https://sourceware.org/pipermail/binutils/2002-February/017444.html and +// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is +// weird because libgcc.a would be the natural place. The linker generation +// approach has the advantage that the linker can generate multiple copies to +// avoid long branch thunks. However, we don't consider the advantage +// significant enough to complicate our trunk implementation, so we take the +// simple approach and synthesize .text sections providing the implementation. +void elf::addPPC64SaveRestore() { + static uint32_t savegpr0[20], restgpr0[21], savegpr1[19], restgpr1[19]; + constexpr uint32_t blr = 0x4e800020, mtlr_0 = 0x7c0803a6; + + // _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ... + // Tail: ld 0, 16(1); mtlr 0; blr + writeSequence(restgpr0, "_restgpr0_", 14, 0xe9c1ff70, + {0xe8010010, mtlr_0, blr}); + // _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ... + // Tail: blr + writeSequence(restgpr1, "_restgpr1_", 14, 0xe9ccff70, {blr}); + // _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ... + // Tail: std 0, 16(1); blr + writeSequence(savegpr0, "_savegpr0_", 14, 0xf9c1ff70, {0xf8010010, blr}); + // _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ... + // Tail: blr + writeSequence(savegpr1, "_savegpr1_", 14, 0xf9ccff70, {blr}); +} + // Find the R_PPC64_ADDR64 in .rela.toc with matching offset. template static std::pair diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index a308a41ff4b92..47905ae64a47d 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -213,6 +213,7 @@ unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther); // the .toc section. bool isPPC64SmallCodeModelTocReloc(RelType type); +void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index f782cd3cbc45a..9a6be7931a286 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -264,6 +264,8 @@ void elf::addReservedSymbols() { // glibc *crt1.o has a undefined reference to _SDA_BASE_. Since we don't // support Small Data Area, define it arbitrarily as 0. addOptionalRegular("_SDA_BASE_", nullptr, 0, STV_HIDDEN); + } else if (config->emachine == EM_PPC64) { + addPPC64SaveRestore(); } // The Power Architecture 64-bit v2 ABI defines a TableOfContents (TOC) which diff --git a/lld/test/ELF/ppc64-restgpr0.s b/lld/test/ELF/ppc64-restgpr0.s new file mode 100644 index 0000000000000..3627272dab55f --- /dev/null +++ b/lld/test/ELF/ppc64-restgpr0.s @@ -0,0 +1,38 @@ +# REQUIRES: ppc +## Test code sequences of synthesized _restgpr0_{14..31} + +# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t14.o +# RUN: ld.lld %t14.o -o %t14 +# RUN: llvm-objdump -d %t14 | FileCheck --check-prefix=R14 %s + +# R14-LABEL: <_restgpr0_14>: +# R14-NEXT: ld 14, -144(1) +# R14-NEXT: ld 15, -136(1) +# R14-EMPTY: +# R14-NEXT: <_restgpr0_16>: +# R14-NEXT: ld 16, -128(1) +# R14: ld 31, -8(1) +# R14-NEXT: ld 0, 16(1) +# R14-NEXT: mtlr 0 +# R14-NEXT: blr + +## Don't synthesize _restgpr0_{14..30} because they are unused. +# RUN: echo 'bl _restgpr0_31' | llvm-mc -filetype=obj -triple=ppc64 - -o %t31.o +# RUN: ld.lld %t31.o -o %t31 +# RUN: llvm-objdump -d %t31 | FileCheck --check-prefix=R31 %s + +# R31-LABEL: Disassembly of section .text: +# R31-EMPTY: +# R31-NEXT: <_restgpr0_31>: +# R31-NEXT: ld 31, -8(1) +# R31-NEXT: ld 0, 16(1) +# R31-NEXT: mtlr 0 +# R31-NEXT: blr + +# RUN: echo 'bl _restgpr0_32' | llvm-mc -filetype=obj -triple=ppc64 - -o %t32.o +# RUN: not ld.lld %t32.o -o /dev/null + +.globl _start +_start: + bl _restgpr0_14 + bl _restgpr0_16 diff --git a/lld/test/ELF/ppc64-restgpr1.s b/lld/test/ELF/ppc64-restgpr1.s new file mode 100644 index 0000000000000..e4b97daf06d75 --- /dev/null +++ b/lld/test/ELF/ppc64-restgpr1.s @@ -0,0 +1,34 @@ +# REQUIRES: ppc +## Test code sequences of synthesized _restgpr1_{14..31} + +# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t14.o +# RUN: ld.lld %t14.o -o %t14 +# RUN: llvm-objdump -d %t14 | FileCheck --check-prefix=R14 %s + +# R14: <_restgpr1_14>: +# R14-NEXT: ld 14, -144(12) +# R14-NEXT: ld 15, -136(12) +# R14-EMPTY: +# R14-NEXT: <_restgpr1_16>: +# R14-NEXT: ld 16, -128(12) +# R14: ld 31, -8(12) +# R14-NEXT: blr + +## Don't synthesize _restgpr1_{14..30} because they are unused. +# RUN: echo 'bl _restgpr1_31' | llvm-mc -filetype=obj -triple=ppc64 - -o %t31.o +# RUN: ld.lld %t31.o -o %t31 +# RUN: llvm-objdump -d %t31 | FileCheck --check-prefix=R31 %s + +# R31-LABEL: Disassembly of section .text: +# R31-EMPTY: +# R31-NEXT: <_restgpr1_31>: +# R31-NEXT: ld 31, -8(12) +# R31-NEXT: blr + +# RUN: echo 'bl _restgpr1_32' | llvm-mc -filetype=obj -triple=ppc64le - -o %t32.o +# RUN: not ld.lld %t32.o -o /dev/null + +.globl _start +_start: + bl _restgpr1_14 + bl _restgpr1_16 diff --git a/lld/test/ELF/ppc64-savegpr0.s b/lld/test/ELF/ppc64-savegpr0.s new file mode 100644 index 0000000000000..1e85340b99dfa --- /dev/null +++ b/lld/test/ELF/ppc64-savegpr0.s @@ -0,0 +1,36 @@ +# REQUIRES: ppc +## Test code sequences of synthesized _savegpr0_{14..31} + +# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t14.o +# RUN: ld.lld %t14.o -o %t14 +# RUN: llvm-objdump -d %t14 | FileCheck --check-prefix=R14 %s + +# R14-LABEL: <_savegpr0_14>: +# R14-NEXT: std 14, -144(1) +# R14-NEXT: std 15, -136(1) +# R14-EMPTY: +# R14-NEXT: <_savegpr0_16>: +# R14-NEXT: std 16, -128(1) +# R14: std 31, -8(1) +# R14-NEXT: std 0, 16(1) +# R14-NEXT: blr + +## Don't synthesize _savegpr0_{14..30} because they are unused. +# RUN: echo 'bl _savegpr0_31' | llvm-mc -filetype=obj -triple=ppc64 - -o %t31.o +# RUN: ld.lld %t31.o -o %t31 +# RUN: llvm-objdump -d %t31 | FileCheck --check-prefix=R31 %s + +# R31-LABEL: Disassembly of section .text: +# R31-EMPTY: +# R31-NEXT: <_savegpr0_31>: +# R31-NEXT: std 31, -8(1) +# R31-NEXT: std 0, 16(1) +# R31-NEXT: blr + +# RUN: echo 'bl _savegpr0_32' | llvm-mc -filetype=obj -triple=ppc64 - -o %t32.o +# RUN: not ld.lld %t32.o -o /dev/null + +.globl _start +_start: + bl _savegpr0_14 + bl _savegpr0_16 diff --git a/lld/test/ELF/ppc64-savegpr1.s b/lld/test/ELF/ppc64-savegpr1.s new file mode 100644 index 0000000000000..abb878285f823 --- /dev/null +++ b/lld/test/ELF/ppc64-savegpr1.s @@ -0,0 +1,34 @@ +# REQUIRES: ppc +## Test code sequences of synthesized _savegpr1_{14..31} + +# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t14.o +# RUN: ld.lld %t14.o -o %t14 +# RUN: llvm-objdump -d %t14 | FileCheck --check-prefix=R14 %s + +# R14-LABEL: <_savegpr1_14>: +# R14-NEXT: std 14, -144(12) +# R14-NEXT: std 15, -136(12) +# R14-EMPTY: +# R14-NEXT: <_savegpr1_16>: +# R14-NEXT: std 16, -128(12) +# R14: std 31, -8(12) +# R14-NEXT: blr + +## Don't synthesize _savegpr1_{14..30} because they are unused. +# RUN: echo 'bl _savegpr1_31' | llvm-mc -filetype=obj -triple=ppc64 - -o %t31.o +# RUN: ld.lld %t31.o -o %t31 +# RUN: llvm-objdump -d %t31 | FileCheck --check-prefix=R31 %s + +# R31-LABEL: Disassembly of section .text: +# R31-EMPTY: +# R31-NEXT: <_savegpr1_31>: +# R31-NEXT: std 31, -8(12) +# R31-NEXT: blr + +# RUN: echo 'bl _savegpr1_32' | llvm-mc -filetype=obj -triple=ppc64le - -o %t32.o +# RUN: not ld.lld %t32.o -o /dev/null + +.globl _start +_start: + bl _savegpr1_14 + bl _savegpr1_16 diff --git a/lld/test/ELF/ppc64-saveres.s b/lld/test/ELF/ppc64-saveres.s new file mode 100644 index 0000000000000..70ef71779952a --- /dev/null +++ b/lld/test/ELF/ppc64-saveres.s @@ -0,0 +1,31 @@ +# REQUIRES: ppc +## Test that some save and restore functions can be synthesized. +## The code sequences are tested by ppc64-restgpr*.s and ppc64-savegpr*.s + +# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readelf -s %t.so | FileCheck --check-prefix=NM %s +# RUN: llvm-objdump -d %t.so | FileCheck %s + +## The synthesized symbols are not exported. +# NM: FUNC LOCAL HIDDEN {{.*}} _restgpr0_30 +# NM-NEXT: FUNC LOCAL HIDDEN {{.*}} _restgpr1_30 +# NM-NEXT: FUNC LOCAL HIDDEN {{.*}} _savegpr0_30 +# NM-NEXT: FUNC LOCAL HIDDEN {{.*}} _savegpr1_30 + +# CHECK: 00000000000[[#%x,RESTGPR0:]] <_restgpr0_30>: +# CHECK: 00000000000[[#%x,RESTGPR1:]] <_restgpr1_30>: +# CHECK: 00000000000[[#%x,SAVEGPR0:]] <_savegpr0_30>: +# CHECK: 00000000000[[#%x,SAVEGPR1:]] <_savegpr1_30>: +# CHECK-LABEL: <_start>: +# CHECK-NEXT: bl 0x[[#RESTGPR0]] +# CHECK-NEXT: bl 0x[[#RESTGPR1]] +# CHECK-NEXT: bl 0x[[#SAVEGPR0]] +# CHECK-NEXT: bl 0x[[#SAVEGPR1]] + +.globl _start +_start: + bl _restgpr0_30 + bl _restgpr1_30 + bl _savegpr0_30 + bl _savegpr1_30 From d4086213c6d76fcaa5fa620ad680eaaf886cc66e Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 09:37:14 -0700 Subject: [PATCH 109/770] [dsymutil] Escape CFBundleIdentifier in plist. Revision 333565 started escaping HTML special characters in the plist written by dsymutil, but didn't include the updated CFBundleIdentifier. --- llvm/test/tools/dsymutil/Inputs/Info.plist | 2 +- llvm/test/tools/dsymutil/X86/darwin-bundle.test | 2 +- llvm/tools/dsymutil/dsymutil.cpp | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/test/tools/dsymutil/Inputs/Info.plist b/llvm/test/tools/dsymutil/Inputs/Info.plist index 97c0ae261f35f..e330c951d68a9 100644 --- a/llvm/test/tools/dsymutil/Inputs/Info.plist +++ b/llvm/test/tools/dsymutil/Inputs/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleIdentifier - custom + Foo&Bar CFBundleInfoDictionaryVersion 6.0 CFBundlePackageType diff --git a/llvm/test/tools/dsymutil/X86/darwin-bundle.test b/llvm/test/tools/dsymutil/X86/darwin-bundle.test index 7f1224f30a1ef..d44b25e487054 100644 --- a/llvm/test/tools/dsymutil/X86/darwin-bundle.test +++ b/llvm/test/tools/dsymutil/X86/darwin-bundle.test @@ -18,7 +18,7 @@ CHECK-NEXT: CHECK-NEXT: CFBundleDevelopmentRegion CHECK-NEXT: English CHECK-NEXT: CFBundleIdentifier -CHECK-NEXT: com.apple.xcode.dsym.custom +CHECK-NEXT: com.apple.xcode.dsym.Foo&Bar CHECK-NEXT: CFBundleInfoDictionaryVersion CHECK-NEXT: 6.0 CHECK-NEXT: CFBundlePackageType diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index 32df55611f070..3a32acbec06f9 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -338,7 +338,9 @@ static Error createPlistFile(StringRef Bin, StringRef BundleRoot, << "\t\tCFBundleDevelopmentRegion\n" << "\t\tEnglish\n" << "\t\tCFBundleIdentifier\n" - << "\t\tcom.apple.xcode.dsym." << BI.IDStr << "\n" + << "\t\tcom.apple.xcode.dsym."; + printHTMLEscaped(BI.IDStr, PL); + PL << "\n" << "\t\tCFBundleInfoDictionaryVersion\n" << "\t\t6.0\n" << "\t\tCFBundlePackageType\n" From fb38b98338cc87442e3451665e82bf1c8ef9388f Mon Sep 17 00:00:00 2001 From: alex-t Date: Tue, 26 May 2020 19:47:29 +0300 Subject: [PATCH 110/770] [AMDGPU] NFC target dependent requiresUniformRegister refactored out Summary: Target specific method encapsulated into the Target Lowering Info. Reviewers: rampitec, vpykhtin Reviewed By: rampitec Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70085 --- llvm/include/llvm/CodeGen/TargetLowering.h | 13 +++++++------ .../CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 3 +-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 ++++++ llvm/lib/Target/AMDGPU/SIISelLowering.h | 5 +++-- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 2689838b3e7cc..70bc6b986d3c4 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -28,6 +28,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -821,12 +822,12 @@ class TargetLoweringBase { return RC; } - /// Allows target to decide about the register class of the - /// specific value that is live outside the defining block. - /// Returns true if the value needs uniform register class. - virtual bool requiresUniformRegister(MachineFunction &MF, - const Value *) const { - return false; + /// Allows target to decide about the divergence of the + /// specific value. Base class implementation returns true + /// if the Divergece Analysis exists and reports value as divergent. + virtual bool isDivergent(const LegacyDivergenceAnalysis *DA, + MachineFunction &MF, const Value *V) const { + return DA && DA->isDivergent(V); } /// Return the 'representative' register class for the specified value diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 7a5fd7d24c681..36e9ea538b6b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -398,8 +398,7 @@ Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { } Register FunctionLoweringInfo::CreateRegs(const Value *V) { - return CreateRegs(V->getType(), DA && DA->isDivergent(V) && - !TLI->requiresUniformRegister(*MF, V)); + return CreateRegs(V->getType(), TLI->isDivergent(DA, *MF, V)); } /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2c147fa8947c1..722275e00a137 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11226,6 +11226,12 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { return RC; } +bool SITargetLowering::isDivergent(const LegacyDivergenceAnalysis *DA, + MachineFunction &MF, const Value *V) const { + return !requiresUniformRegister(MF, V) && + TargetLoweringBase::isDivergent(DA, MF, V); +} + // FIXME: This is a workaround for DivergenceAnalysis not understanding always // uniform values (as produced by the mask results of control flow intrinsics) // used outside of divergent blocks. The phi users need to also be treated as diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 7ef11eba4f9ce..80f3a87ce0fa9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -416,8 +416,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override; - virtual bool requiresUniformRegister(MachineFunction &MF, - const Value *V) const override; + virtual bool isDivergent(const LegacyDivergenceAnalysis *DA, + MachineFunction &MF, const Value *V) const override; + bool requiresUniformRegister(MachineFunction &MF, const Value *V) const; Align getPrefLoopAlignment(MachineLoop *ML) const override; void allocateHSAUserSGPRs(CCState &CCInfo, From 9786e7552d5564268484357866088d0a054bccaf Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 26 May 2020 12:58:18 -0400 Subject: [PATCH 111/770] Revert "[AMDGPU] NFC target dependent requiresUniformRegister refactored out" This reverts commit fb38b98338cc87442e3451665e82bf1c8ef9388f. This will regress compile time. --- llvm/include/llvm/CodeGen/TargetLowering.h | 13 ++++++------- .../CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 3 ++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 ------ llvm/lib/Target/AMDGPU/SIISelLowering.h | 5 ++--- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 70bc6b986d3c4..2689838b3e7cc 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -822,12 +821,12 @@ class TargetLoweringBase { return RC; } - /// Allows target to decide about the divergence of the - /// specific value. Base class implementation returns true - /// if the Divergece Analysis exists and reports value as divergent. - virtual bool isDivergent(const LegacyDivergenceAnalysis *DA, - MachineFunction &MF, const Value *V) const { - return DA && DA->isDivergent(V); + /// Allows target to decide about the register class of the + /// specific value that is live outside the defining block. + /// Returns true if the value needs uniform register class. + virtual bool requiresUniformRegister(MachineFunction &MF, + const Value *) const { + return false; } /// Return the 'representative' register class for the specified value diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 36e9ea538b6b0..7a5fd7d24c681 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -398,7 +398,8 @@ Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { } Register FunctionLoweringInfo::CreateRegs(const Value *V) { - return CreateRegs(V->getType(), TLI->isDivergent(DA, *MF, V)); + return CreateRegs(V->getType(), DA && DA->isDivergent(V) && + !TLI->requiresUniformRegister(*MF, V)); } /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 722275e00a137..2c147fa8947c1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11226,12 +11226,6 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { return RC; } -bool SITargetLowering::isDivergent(const LegacyDivergenceAnalysis *DA, - MachineFunction &MF, const Value *V) const { - return !requiresUniformRegister(MF, V) && - TargetLoweringBase::isDivergent(DA, MF, V); -} - // FIXME: This is a workaround for DivergenceAnalysis not understanding always // uniform values (as produced by the mask results of control flow intrinsics) // used outside of divergent blocks. The phi users need to also be treated as diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 80f3a87ce0fa9..7ef11eba4f9ce 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -416,9 +416,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override; - virtual bool isDivergent(const LegacyDivergenceAnalysis *DA, - MachineFunction &MF, const Value *V) const override; - bool requiresUniformRegister(MachineFunction &MF, const Value *V) const; + virtual bool requiresUniformRegister(MachineFunction &MF, + const Value *V) const override; Align getPrefLoopAlignment(MachineLoop *ML) const override; void allocateHSAUserSGPRs(CCState &CCInfo, From a0ce2338a0838ccb04e10bd4f8e9ec9d7136e1d2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 May 2020 12:48:22 -0400 Subject: [PATCH 112/770] [InstCombine] reassociate fsub+fadd with FMF to increase adds and throughput The -reassociate pass tends to transform this kind of pattern into something that is worse for vectorization and codegen. See PR43953: https://bugs.llvm.org/show_bug.cgi?id=43953 --- .../InstCombine/InstCombineAddSub.cpp | 11 +++++++ llvm/test/Transforms/InstCombine/fsub.ll | 32 +++++++++++++------ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 85d6f47b205b7..233e0c7b5de72 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2195,6 +2195,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I); } + // Reassociate fsub/fadd sequences to create more fadd instructions and + // reduce dependency chains: + // ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1) + Value *Z; + if (match(Op0, m_OneUse(m_c_FAdd(m_OneUse(m_FSub(m_Value(X), m_Value(Y))), + m_Value(Z))))) { + Value *XZ = Builder.CreateFAddFMF(X, Z, &I); + Value *YW = Builder.CreateFAddFMF(Y, Op1, &I); + return BinaryOperator::CreateFSubFMF(XZ, YW, &I); + } + if (Instruction *F = factorizeFAddFSub(I, Builder)) return F; diff --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll index 68e49c21b3b42..a0f02dee2909a 100644 --- a/llvm/test/Transforms/InstCombine/fsub.ll +++ b/llvm/test/Transforms/InstCombine/fsub.ll @@ -785,11 +785,13 @@ define float @fneg_fsub_constant(float %x) { ret float %sub } +; ((w-x) + y) - z --> (w+y) - (x+z) + define float @fsub_fadd_fsub_reassoc(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: @fsub_fadd_fsub_reassoc( -; CHECK-NEXT: [[S1:%.*]] = fsub reassoc nsz float [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = fadd reassoc nsz float [[S1]], [[Y:%.*]] -; CHECK-NEXT: [[S2:%.*]] = fsub reassoc nsz float [[A]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[W:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[S2:%.*]] = fsub reassoc nsz float [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret float [[S2]] ; %s1 = fsub reassoc nsz float %w, %x @@ -798,12 +800,14 @@ define float @fsub_fadd_fsub_reassoc(float %w, float %x, float %y, float %z) { ret float %s2 } +; FMF on the last op is enough to do the transform; vectors work too. + define <2 x float> @fsub_fadd_fsub_reassoc_commute(<2 x float> %w, <2 x float> %x, <2 x float> %y, <2 x float> %z) { ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_commute( ; CHECK-NEXT: [[D:%.*]] = fdiv <2 x float> [[Y:%.*]], -; CHECK-NEXT: [[S1:%.*]] = fsub <2 x float> [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = fadd <2 x float> [[D]], [[S1]] -; CHECK-NEXT: [[S2:%.*]] = fsub fast <2 x float> [[A]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[D]], [[W:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[S2:%.*]] = fsub fast <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x float> [[S2]] ; %d = fdiv <2 x float> %y, ; thwart complexity-based canonicalization @@ -813,12 +817,14 @@ define <2 x float> @fsub_fadd_fsub_reassoc_commute(<2 x float> %w, <2 x float> % ret <2 x float> %s2 } +; (v-w) + (x-y) - z --> (v+x) - (w+y+z) + define float @fsub_fadd_fsub_reassoc_twice(float %v, float %w, float %x, float %y, float %z) { ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_twice( -; CHECK-NEXT: [[S1:%.*]] = fsub reassoc nsz float [[V:%.*]], [[W:%.*]] -; CHECK-NEXT: [[S2:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[A:%.*]] = fadd reassoc nsz float [[S1]], [[S2]] -; CHECK-NEXT: [[S3:%.*]] = fsub reassoc nsz float [[A]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[W:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[X:%.*]], [[V:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc nsz float [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[S3:%.*]] = fsub reassoc nsz float [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret float [[S3]] ; %s1 = fsub reassoc nsz float %v, %w @@ -828,6 +834,8 @@ define float @fsub_fadd_fsub_reassoc_twice(float %v, float %w, float %x, float % ret float %s3 } +; negative test - FMF + define float @fsub_fadd_fsub_not_reassoc(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: @fsub_fadd_fsub_not_reassoc( ; CHECK-NEXT: [[S1:%.*]] = fsub fast float [[W:%.*]], [[X:%.*]] @@ -841,6 +849,8 @@ define float @fsub_fadd_fsub_not_reassoc(float %w, float %x, float %y, float %z) ret float %s2 } +; negative test - uses + define float @fsub_fadd_fsub_reassoc_use1(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_use1( ; CHECK-NEXT: [[S1:%.*]] = fsub fast float [[W:%.*]], [[X:%.*]] @@ -856,6 +866,8 @@ define float @fsub_fadd_fsub_reassoc_use1(float %w, float %x, float %y, float %z ret float %s2 } +; negative test - uses + define float @fsub_fadd_fsub_reassoc_use2(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_use2( ; CHECK-NEXT: [[S1:%.*]] = fsub fast float [[W:%.*]], [[X:%.*]] From 106ec64fbc7fb5ef28d0368fb1dca18e67e75adf Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Tue, 4 Feb 2020 15:19:33 -0800 Subject: [PATCH 113/770] [PGO] Add memcmp/bcmp size value profiling. Summary: This adds support for memcmp/bcmp to the existing memcpy/memset value profiling. Reviewers: davidxl Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D79751 --- .../Instrumentation/PGOInstrumentation.cpp | 52 +++-- .../Instrumentation/PGOMemOPSizeOpt.cpp | 190 +++++++++++++----- .../Instrumentation/ValueProfileCollector.cpp | 10 +- .../Instrumentation/ValueProfileCollector.h | 3 +- .../Instrumentation/ValueProfilePlugins.inc | 22 +- .../Inputs/memop_size_annotation.proftext | 22 +- .../PGOProfile/memop_size_annotation.ll | 9 + .../Transforms/PGOProfile/memop_size_opt.ll | 130 ++++++++++-- 8 files changed, 354 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 5fcb0b27d46fe..72eb5cd61b003 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -377,6 +377,7 @@ class PGOInstrumentationGenLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } }; @@ -405,6 +406,7 @@ class PGOInstrumentationUseLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } }; @@ -437,6 +439,7 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) @@ -566,11 +569,11 @@ template class FuncPGOInstrumentation { } FuncPGOInstrumentation( - Function &Func, + Function &Func, TargetLibraryInfo &TLI, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) - : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); @@ -834,15 +837,16 @@ populateEHOperandBundle(VPCandidateInfo &Cand, // Visit all edge and instrument the edges not in MST, and do value profiling. // Critical edges will be split. static void instrumentOneFunc( - Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, + Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI, std::unordered_multimap &ComdatMembers, bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI, IsCS); + FuncPGOInstrumentation FuncInfo(F, TLI, ComdatMembers, true, + BPI, BFI, IsCS); std::vector InstrumentBBs; FuncInfo.getInstrumentBBs(InstrumentBBs); unsigned NumCounters = @@ -997,12 +1001,12 @@ namespace { class PGOUseFunc { public: - PGOUseFunc(Function &Func, Module *Modu, + PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, ProfileSummaryInfo *PSI, bool IsCS) : F(Func), M(Modu), BFI(BFIin), PSI(PSI), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS), FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. @@ -1504,7 +1508,8 @@ static void collectComdatMembers( } static bool InstrumentAllFunctions( - Module &M, function_ref LookupBPI, + Module &M, function_ref LookupTLI, + function_ref LookupBPI, function_ref LookupBFI, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. @@ -1516,9 +1521,10 @@ static bool InstrumentAllFunctions( for (auto &F : M) { if (F.isDeclaration()) continue; + auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); + instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1534,27 +1540,32 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { if (skipModule(M)) return false; + auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; auto LookupBPI = [this](Function &F) { return &this->getAnalysis(F).getBPI(); }; auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); + return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); + auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; auto LookupBPI = [&FAM](Function &F) { return &FAM.getResult(F); }; - auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) + if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1562,6 +1573,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, + function_ref LookupTLI, function_ref LookupBPI, function_ref LookupBFI, ProfileSummaryInfo *PSI, bool IsCS) { @@ -1609,12 +1621,13 @@ static bool annotateAllFunctions( for (auto &F : M) { if (F.isDeclaration()) continue; + auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); + PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1695,10 +1708,12 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); + auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; auto LookupBPI = [&FAM](Function &F) { return &FAM.getResult(F); }; - auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult(F); }; @@ -1706,7 +1721,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, auto *PSI = &AM.getResult(M); if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI, PSI, IsCS)) + LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1716,6 +1731,9 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { if (skipModule(M)) return false; + auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; auto LookupBPI = [this](Function &F) { return &this->getAnalysis(F).getBPI(); }; @@ -1724,8 +1742,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { }; auto *PSI = &getAnalysis().getPSI(); - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, - IsCS); + return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, + LookupBFI, PSI, IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 9767fda82f3da..bef0e0257f029 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -95,6 +95,11 @@ extern cl::opt MemOPSizeRange; // This option sets the value that groups large memop sizes extern cl::opt MemOPSizeLarge; +static cl::opt + MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(false), + cl::Hidden, + cl::desc("Size-specialize memcmp and bcmp calls")); + namespace { class PGOMemOPSizeOptLegacyPass : public FunctionPass { public: @@ -113,6 +118,7 @@ class PGOMemOPSizeOptLegacyPass : public FunctionPass { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); } }; } // end anonymous namespace @@ -122,6 +128,7 @@ INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) @@ -131,11 +138,90 @@ FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { } namespace { + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp). +struct MemOp { + Instruction *I; + MemOp(MemIntrinsic *MI) : I(MI) {} + MemOp(CallInst *CI) : I(CI) {} + MemIntrinsic *asMI() { return dyn_cast(I); } + CallInst *asCI() { return cast(I); } + MemOp clone() { + if (auto MI = asMI()) + return MemOp(cast(MI->clone())); + return MemOp(cast(asCI()->clone())); + } + Value *getLength() { + if (auto MI = asMI()) + return MI->getLength(); + return asCI()->getArgOperand(2); + } + void setLength(Value *Length) { + if (auto MI = asMI()) + return MI->setLength(Length); + asCI()->setArgOperand(2, Length); + } + StringRef getFuncName() { + if (auto MI = asMI()) + return MI->getCalledFunction()->getName(); + return asCI()->getCalledFunction()->getName(); + } + bool isMemmove() { + if (auto MI = asMI()) + if (MI->getIntrinsicID() == Intrinsic::memmove) + return true; + return false; + } + bool isMemcmp(TargetLibraryInfo &TLI) { + LibFunc Func; + if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && + Func == LibFunc_memcmp) { + return true; + } + return false; + } + bool isBcmp(TargetLibraryInfo &TLI) { + LibFunc Func; + if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && + Func == LibFunc_bcmp) { + return true; + } + return false; + } + const char *getName(TargetLibraryInfo &TLI) { + if (auto MI = asMI()) + return getMIName(MI); + LibFunc Func; + if (TLI.getLibFunc(*asCI(), Func)) { + if (Func == LibFunc_memcmp) + return "memcmp"; + if (Func == LibFunc_bcmp) + return "bcmp"; + } + llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst"); + return nullptr; + } +}; + class MemOPSizeOpt : public InstVisitor { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE, DominatorTree *DT) - : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree *DT, + TargetLibraryInfo &TLI) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) { ValueDataArray = std::make_unique(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -147,13 +233,12 @@ class MemOPSizeOpt : public InstVisitor { WorkList.clear(); visit(Func); - for (auto &MI : WorkList) { + for (auto &MO : WorkList) { ++NumOfPGOMemOPAnnotate; - if (perform(MI)) { + if (perform(MO)) { Changed = true; ++NumOfPGOMemOPOpt; - LLVM_DEBUG(dbgs() << "MemOP call: " - << MI->getCalledFunction()->getName() + LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName() << "is Transformed.\n"); } } @@ -164,7 +249,16 @@ class MemOPSizeOpt : public InstVisitor { // Not perform on constant length calls. if (dyn_cast(Length)) return; - WorkList.push_back(&MI); + WorkList.push_back(MemOp(&MI)); + } + + void visitCallInst(CallInst &CI) { + LibFunc Func; + if (TLI.getLibFunc(CI, Func) && + (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && + !dyn_cast(CI.getArgOperand(2))) { + WorkList.push_back(MemOp(&CI)); + } } private: @@ -172,15 +266,16 @@ class MemOPSizeOpt : public InstVisitor { BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; DominatorTree *DT; + TargetLibraryInfo &TLI; bool Changed; - std::vector WorkList; + std::vector WorkList; // Start of the previse range. int64_t PreciseRangeStart; // Last value of the previse range. int64_t PreciseRangeLast; // The space to read the profile annotation. std::unique_ptr ValueDataArray; - bool perform(MemIntrinsic *MI); + bool perform(MemOp MO); // This kind shows which group the value falls in. For PreciseValue, we have // the profile count for that value. LargeGroup groups the values that are in @@ -196,19 +291,6 @@ class MemOPSizeOpt : public InstVisitor { } }; -static const char *getMIName(const MemIntrinsic *MI) { - switch (MI->getIntrinsicID()) { - case Intrinsic::memcpy: - return "memcpy"; - case Intrinsic::memmove: - return "memmove"; - case Intrinsic::memset: - return "memset"; - default: - return "unknown"; - } -} - static bool isProfitable(uint64_t Count, uint64_t TotalCount) { assert(Count <= TotalCount); if (Count < MemOPCountThreshold) @@ -227,21 +309,23 @@ static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, return ScaleCount / Denom; } -bool MemOPSizeOpt::perform(MemIntrinsic *MI) { - assert(MI); - if (MI->getIntrinsicID() == Intrinsic::memmove) +bool MemOPSizeOpt::perform(MemOp MO) { + assert(MO.I); + if (MO.isMemmove()) + return false; + if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI))) return false; uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; uint64_t TotalCount; - if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions, ValueDataArray.get(), NumVals, TotalCount)) return false; uint64_t ActualCount = TotalCount; uint64_t SavedTotalCount = TotalCount; if (MemOPScaleCount) { - auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent()); if (!BBEdgeCount) return false; ActualCount = *BBEdgeCount; @@ -333,13 +417,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { // } // merge_bb: - BasicBlock *BB = MI->getParent(); + BasicBlock *BB = MO.I->getParent(); LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI, DT); - BasicBlock::iterator It(*MI); + BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT); + BasicBlock::iterator It(*MO.I); ++It; assert(It != DefaultBB->end()); BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); @@ -351,15 +435,24 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); - Value *SizeVar = MI->getLength(); + Value *SizeVar = MO.getLength(); SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + Type *MemOpTy = MO.I->getType(); + PHINode *PHI = nullptr; + if (!MemOpTy->isVoidTy()) { + // Insert a phi for the return values at the merge block. + IRBuilder<> IRBM(MergeBB->getFirstNonPHI()); + PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge"); + MO.I->replaceAllUsesWith(PHI); + PHI->addIncoming(MO.I, DefaultBB); + } // Clear the value profile data. - MI->setMetadata(LLVMContext::MD_prof, nullptr); + MO.I->setMetadata(LLVMContext::MD_prof, nullptr); // If all promoted, we don't need the MD.prof metadata. if (SavedRemainCount > 0 || Version != NumVals) // Otherwise we need update with the un-promoted records back. - annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version), SavedRemainCount, IPVK_MemOPSize, NumVals); LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); @@ -371,17 +464,18 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); - Instruction *NewInst = MI->clone(); + MemOp NewMO = MO.clone(); // Fix the argument. - auto *MemI = cast(NewInst); - auto *SizeType = dyn_cast(MemI->getLength()->getType()); + auto *SizeType = dyn_cast(NewMO.getLength()->getType()); assert(SizeType && "Expected integer type size argument."); ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); - MemI->setLength(CaseSizeId); - CaseBB->getInstList().push_back(NewInst); + NewMO.setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewMO.I); IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); SI->addCase(CaseSizeId, CaseBB); + if (!MemOpTy->isVoidTy()) + PHI->addIncoming(NewMO.I, CaseBB); if (DT) { Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); Updates.push_back({DominatorTree::Insert, BB, CaseBB}); @@ -399,11 +493,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { ORE.emit([&]() { using namespace ore; - return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI) - << "optimized " << NV("Intrinsic", StringRef(getMIName(MI))) - << " with count " << NV("Count", SumForOpt) << " out of " - << NV("Total", TotalCount) << " for " << NV("Versions", Version) - << " versions"; + return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I) + << "optimized " << NV("Memop", MO.getName(TLI)) << " with count " + << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount) + << " for " << NV("Versions", Version) << " versions"; }); return true; @@ -412,13 +505,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, - DominatorTree *DT) { + DominatorTree *DT, TargetLibraryInfo &TLI) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -429,7 +522,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { auto &ORE = getAnalysis().getORE(); auto *DTWP = getAnalysisIfAvailable(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); + TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); } namespace llvm { @@ -440,7 +535,8 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, auto &BFI = FAM.getResult(F); auto &ORE = FAM.getResult(F); auto *DT = FAM.getCachedResult(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); + auto &TLI = FAM.getResult(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp index 604726d4f40fc..cd4f636ff1320 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp +++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp @@ -38,7 +38,7 @@ using PluginChainFinal = PluginChain; template <> class PluginChain<> { public: - PluginChain(Function &F) {} + PluginChain(Function &F, TargetLibraryInfo &TLI) {} void get(InstrProfValueKind K, std::vector &Candidates) {} }; @@ -48,7 +48,8 @@ class PluginChain : public PluginChain { using Base = PluginChain; public: - PluginChain(Function &F) : PluginChain(F), Plugin(F) {} + PluginChain(Function &F, TargetLibraryInfo &TLI) + : PluginChain(F, TLI), Plugin(F, TLI) {} void get(InstrProfValueKind K, std::vector &Candidates) { if (K == PluginT::Kind) @@ -65,8 +66,9 @@ class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal using PluginChainFinal::PluginChainFinal; }; -ValueProfileCollector::ValueProfileCollector(Function &F) - : PImpl(new ValueProfileCollectorImpl(F)) {} +ValueProfileCollector::ValueProfileCollector(Function &F, + TargetLibraryInfo &TLI) + : PImpl(new ValueProfileCollectorImpl(F, TLI)) {} ValueProfileCollector::~ValueProfileCollector() = default; diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h index ff883c8d0c779..c3f549c2e7cc5 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h +++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h @@ -16,6 +16,7 @@ #ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H #define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -58,7 +59,7 @@ class ValueProfileCollector { Instruction *AnnotatedInst; // Where metadata is attached. }; - ValueProfileCollector(Function &Fn); + ValueProfileCollector(Function &Fn, TargetLibraryInfo &TLI); ValueProfileCollector(ValueProfileCollector &&) = delete; ValueProfileCollector &operator=(ValueProfileCollector &&) = delete; diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 361035b178c85..b5dd9fab24a54 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -23,12 +23,14 @@ using CandidateInfo = ValueProfileCollector::CandidateInfo; ///--------------------------- MemIntrinsicPlugin ------------------------------ class MemIntrinsicPlugin : public InstVisitor { Function &F; + TargetLibraryInfo &TLI; std::vector *Candidates; public: static constexpr InstrProfValueKind Kind = IPVK_MemOPSize; - MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {} + MemIntrinsicPlugin(Function &Fn, TargetLibraryInfo &TLI) + : F(Fn), TLI(TLI), Candidates(nullptr) {} void run(std::vector &Cs) { Candidates = &Cs; @@ -45,6 +47,22 @@ public: Instruction *AnnotatedInst = &MI; Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); } + void visitCallInst(CallInst &CI) { + auto *F = CI.getCalledFunction(); + if (!F) + return; + LibFunc Func; + if (TLI.getLibFunc(CI, Func) && + (Func == LibFunc_memcmp || Func == LibFunc_bcmp)) { + Value *Length = CI.getArgOperand(2); + // Not instrument constant length calls. + if (dyn_cast(Length)) + return; + Instruction *InsertPt = &CI; + Instruction *AnnotatedInst = &CI; + Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); + } + } }; ///------------------------ IndirectCallPromotionPlugin ------------------------ @@ -54,7 +72,7 @@ class IndirectCallPromotionPlugin { public: static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget; - IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {} + IndirectCallPromotionPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {} void run(std::vector &Candidates) { std::vector Result = findIndirectCalls(F); diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext b/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext index 400b29df30365..cce1a67a94574 100644 --- a/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext @@ -14,7 +14,27 @@ foo # ValueKind = IPVK_MemOPSize: 1 # NumValueSites: -1 +3 +9 +7:33 +2:88 +9:72 +4:66 +1:99 +5:55 +6:44 +3:77 +8:22 +9 +7:33 +2:88 +9:72 +4:66 +1:99 +5:55 +6:44 +3:77 +8:22 9 7:33 2:88 diff --git a/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll b/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll index a59988462ae64..5884a6ebbb25d 100644 --- a/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll +++ b/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll @@ -33,6 +33,12 @@ for.body3: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false) ; MEMOP_ANNOTATION: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false) ; MEMOP_ANNOTATION-SAME: !prof ![[MEMOP_VALUESITE:[0-9]+]] + %memcmp = call i32 @memcmp(i8* %dst, i8* %src, i64 %conv) +; MEMOP_ANNOTATION: call i32 @memcmp(i8* %dst, i8* %src, i64 %conv) +; MEMOP_ANNOTATION-SAME: !prof ![[MEMOP_VALUESITE]] + %bcmp = call i32 @bcmp(i8* %dst, i8* %src, i64 %conv) +; MEMOP_ANNOTATION: call i32 @bcmp(i8* %dst, i8* %src, i64 %conv) +; MEMOP_ANNOTATION-SAME: !prof ![[MEMOP_VALUESITE]] ; MEMOP_ANNOTATION9: ![[MEMOP_VALUESITE]] = !{!"VP", i32 1, i64 556, i64 1, i64 99, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66, i64 5, i64 55, i64 6, i64 44, i64 7, i64 33, i64 8, i64 22} ; MEMOP_ANNOTATION4: ![[MEMOP_VALUESITE]] = !{!"VP", i32 1, i64 556, i64 1, i64 99, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72} br label %for.inc @@ -56,4 +62,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare i32 @memcmp(i8*, i8*, i64) +declare i32 @bcmp(i8*, i8*, i64) + declare void @llvm.lifetime.end(i64, i8* nocapture) diff --git a/llvm/test/Transforms/PGOProfile/memop_size_opt.ll b/llvm/test/Transforms/PGOProfile/memop_size_opt.ll index 8d6215cf9252e..bc79fbc3e37e7 100644 --- a/llvm/test/Transforms/PGOProfile/memop_size_opt.ll +++ b/llvm/test/Transforms/PGOProfile/memop_size_opt.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 --pgo-memop-optimize-memcmp-bcmp -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML -; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML @@ -57,12 +57,6 @@ for.body3: ; MEMOP_OPT: br label %[[MERGE_LABEL2]] ; MEMOP_OPT: [[MERGE_LABEL2]]: ; MEMOP_OPT: br label %for.inc -; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99} -; Should be 457 total left (original total count 556, minus 99 from specialized -; value 1, which is removed from VP array. Also, we only end up with 5 total -; values, since the default max number of promotions is 5 and therefore -; the rest of the values are ignored when extracting the VP metadata. -; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66} for.inc: %inc = add nsw i32 %j.0, 1 @@ -79,6 +73,83 @@ for.end6: ret void } +declare void @consume(i32 %v1, i32 %v2) + +define void @foo_memcmp_bcmp(i8* %dst, i8* %src, i8* %dst2, i8* %src2, i32* %a, i32 %n) !prof !27 { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.end6, !prof !28 + +for.body: + br label %for.cond1 + +for.cond1: + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %idx.ext = sext i32 %i.0 to i64 + %add.ptr = getelementptr inbounds i32, i32* %a, i64 %idx.ext + %0 = load i32, i32* %add.ptr, align 4 + %cmp2 = icmp slt i32 %j.0, %0 + br i1 %cmp2, label %for.body3, label %for.end, !prof !29 + +for.body3: + %add = add nsw i32 %i.0, 1 + %conv = sext i32 %add to i64 + %memcmp = call i32 @memcmp(i8* %dst, i8* %src, i64 %conv), !prof !30 + %bcmp = call i32 @bcmp(i8* %dst2, i8* %src2, i64 %conv), !prof !31 + call void @consume(i32 %memcmp, i32 %bcmp) + br label %for.inc + +; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL:.*]] [ +; MEMOP_OPT: i64 1, label %[[CASE_1_LABEL:.*]] +; MEMOP_OPT: ], !prof [[SWITCH_BW:![0-9]+]] +; MEMOP_OPT: [[CASE_1_LABEL]]: +; MEMOP_OPT: %[[RV:.*]] = call i32 @memcmp(i8* %dst, i8* %src, i64 1) +; MEMOP_OPT: br label %[[MERGE_LABEL:.*]] +; MEMOP_OPT: [[DEFAULT_LABEL]]: +; MEMOP_OPT: %[[RVD:.*]] = call i32 @memcmp(i8* %dst, i8* %src, i64 %conv), !prof [[NEWVP:![0-9]+]] +; MEMOP_OPT: br label %[[MERGE_LABEL]] +; MEMOP_OPT: [[MERGE_LABEL]]: +; MEMOP_OPT: %[[PHI:.*]] = phi i32 [ %[[RVD]], %[[DEFAULT_LABEL]] ], [ %[[RV]], %[[CASE_1_LABEL]] ] +; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL2:.*]] [ +; MEMOP_OPT: i64 1, label %[[CASE_1_LABEL2:.*]] +; MEMOP_OPT: ], !prof [[SWITCH_BW:![0-9]+]] +; MEMOP_OPT: [[CASE_1_LABEL2]]: +; MEMOP_OPT: %[[RV2:.*]] = call i32 @bcmp(i8* %dst2, i8* %src2, i64 1) +; MEMOP_OPT: br label %[[MERGE_LABEL2:.*]] +; MEMOP_OPT: [[DEFAULT_LABEL2]]: +; MEMOP_OPT: %[[RVD2:.*]] = call i32 @bcmp(i8* %dst2, i8* %src2, i64 %conv), !prof [[NEWVP]] +; MEMOP_OPT: br label %[[MERGE_LABEL2]] +; MEMOP_OPT: [[MERGE_LABEL2]]: +; MEMOP_OPT: %[[PHI2:.*]] = phi i32 [ %[[RVD2]], %[[DEFAULT_LABEL2]] ], [ %[[RV2]], %[[CASE_1_LABEL2]] ] +; MEMOP_OPT: call void @consume(i32 %[[PHI]], i32 %[[PHI2]]) +; MEMOP_OPT: br label %for.inc + +for.inc: + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: + br label %for.inc4 + +for.inc4: + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: + ret void +} + +; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99} +; Should be 457 total left (original total count 556, minus 99 from specialized +; value 1, which is removed from VP array. Also, we only end up with 5 total +; values, since the default max number of promotions is 5 and therefore +; the rest of the values are ignored when extracting the VP metadata. +; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66} + !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} @@ -118,6 +189,9 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare i32 @memcmp(i8*, i8*, i64) +declare i32 @bcmp(i8*, i8*, i64) + declare void @llvm.lifetime.end(i64, i8* nocapture) ; YAML: --- !Passed @@ -127,7 +201,7 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) ; YAML-NEXT: Hotness: 0 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'optimized ' -; YAML-NEXT: - Intrinsic: memcpy +; YAML-NEXT: - Memop: memcpy ; YAML-NEXT: - String: ' with count ' ; YAML-NEXT: - Count: '99' ; YAML-NEXT: - String: ' out of ' @@ -143,7 +217,39 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) ; YAML-NEXT: Hotness: 0 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'optimized ' -; YAML-NEXT: - Intrinsic: memcpy +; YAML-NEXT: - Memop: memcpy +; YAML-NEXT: - String: ' with count ' +; YAML-NEXT: - Count: '99' +; YAML-NEXT: - String: ' out of ' +; YAML-NEXT: - Total: '556' +; YAML-NEXT: - String: ' for ' +; YAML-NEXT: - Versions: '1' +; YAML-NEXT: - String: ' versions' +; YAML-NEXT: ... +; YAML-NEXT: --- !Passed +; YAML-NEXT: Pass: pgo-memop-opt +; YAML-NEXT: Name: memopt-opt +; YAML-NEXT: Function: foo_memcmp_bcmp +; YAML-NEXT: Hotness: 0 +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'optimized ' +; YAML-NEXT: - Memop: memcmp +; YAML-NEXT: - String: ' with count ' +; YAML-NEXT: - Count: '99' +; YAML-NEXT: - String: ' out of ' +; YAML-NEXT: - Total: '556' +; YAML-NEXT: - String: ' for ' +; YAML-NEXT: - Versions: '1' +; YAML-NEXT: - String: ' versions' +; YAML-NEXT: ... +; YAML-NEXT: --- !Passed +; YAML-NEXT: Pass: pgo-memop-opt +; YAML-NEXT: Name: memopt-opt +; YAML-NEXT: Function: foo_memcmp_bcmp +; YAML-NEXT: Hotness: 0 +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'optimized ' +; YAML-NEXT: - Memop: bcmp ; YAML-NEXT: - String: ' with count ' ; YAML-NEXT: - Count: '99' ; YAML-NEXT: - String: ' out of ' From 3e62289f42d21e7e1f9a8b1d6f970740b22f5d47 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Tue, 26 May 2020 13:06:50 -0400 Subject: [PATCH 114/770] [PowerPC][NFC] Add colon to TODO's and fix indentation. --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 7da24f03bc7a7..3ec6788d077b5 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -224,17 +224,17 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( }; static const SpillSlot AIXOffsets32[] = { - CALLEE_SAVED_FPRS, - CALLEE_SAVED_GPRS32, - // Add AIX's extra CSR. - {PPC::R13, -76}, - // TODO Update when we add vector support for AIX. + CALLEE_SAVED_FPRS, + CALLEE_SAVED_GPRS32, + // Add AIX's extra CSR. + {PPC::R13, -76}, + // TODO: Update when we add vector support for AIX. }; static const SpillSlot AIXOffsets64[] = { - CALLEE_SAVED_FPRS, - CALLEE_SAVED_GPRS64, - // TODO Update when we add vector support for AIX. + CALLEE_SAVED_FPRS, + CALLEE_SAVED_GPRS64, + // TODO: Update when we add vector support for AIX. }; if (Subtarget.is64BitELFABI()) { From 2c7d63257d8e33ff721af78045d2be6bac54da05 Mon Sep 17 00:00:00 2001 From: Stefanos Baziotis Date: Tue, 26 May 2020 20:40:45 +0300 Subject: [PATCH 115/770] [MSSA][Doc] Clobbers, more info on Defs / Def chain - Added more info about what we refer as a clobber in MSSA. - Added more info about MemoryDefs and how there is a single Def chain. - The doc portrayed MSSA as modeling the heap whileit is modeling the whole memory, so I changed the wording to not be heap-specific. Differential Revision: https://reviews.llvm.org/D80000 --- llvm/docs/MemorySSA.rst | 81 ++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/llvm/docs/MemorySSA.rst b/llvm/docs/MemorySSA.rst index 1669117fcf560..4f96c83a032d2 100644 --- a/llvm/docs/MemorySSA.rst +++ b/llvm/docs/MemorySSA.rst @@ -14,20 +14,22 @@ interactions between various memory operations. Its goal is to replace unless you're very careful, use of ``MemoryDependenceAnalysis`` can easily result in quadratic-time algorithms in LLVM. Additionally, ``MemorySSA`` doesn't have as many arbitrary limits as ``MemoryDependenceAnalysis``, so you should get -better results, too. +better results, too. One common use of ``MemorySSA`` is to quickly find out +that something definitely cannot happen (for example, reason that a hoist +out of a loop can't happen). At a high level, one of the goals of ``MemorySSA`` is to provide an SSA based form for memory, complete with def-use and use-def chains, which enables users to quickly find may-def and may-uses of memory operations. It can also be thought of as a way to cheaply give versions to the complete -state of heap memory, and associate memory operations with those versions. +state of memory, and associate memory operations with those versions. This document goes over how ``MemorySSA`` is structured, and some basic intuition on how ``MemorySSA`` works. A paper on MemorySSA (with notes about how it's implemented in GCC) `can be found here `_. Though, it's -relatively out-of-date; the paper references multiple heap partitions, but GCC +relatively out-of-date; the paper references multiple memory partitions, but GCC eventually swapped to just using one, like we now have in LLVM. Like GCC's, LLVM's MemorySSA is intraprocedural. @@ -41,9 +43,29 @@ structure that maps ``Instruction``\ s to ``MemoryAccess``\ es, which are Each ``MemoryAccess`` can be one of three types: +- ``MemoryDef`` - ``MemoryPhi`` - ``MemoryUse`` -- ``MemoryDef`` + +``MemoryDef``\ s are operations which may either modify memory, or which +introduce some kind of ordering constraints. Examples of ``MemoryDef``\ s +include ``store``\ s, function calls, ``load``\ s with ``acquire`` (or higher) +ordering, volatile operations, memory fences, etc. A ``MemoryDef`` +always introduces a new version of the entire memory and is linked with a single +``MemoryDef/MemoryPhi`` which is the version of memory that the new +version is based on. This implies that there is a *single* +``Def`` chain that connects all the ``Def``\ s, either directly +or indireclty. For example in: + +.. code-block:: llvm + b = MemoryDef(a) + c = MemoryDef(b) + d = MemoryDef(c) + +``d`` is connected directly with ``c`` and indirectly with ``b``. +This means that ``d`` potentially clobbers (see below) ``c`` *or* +``b`` *or* both. This in turn implies that without the use of `The walker_`, +initially every ``MemoryDef`` clobbers every other ``MemoryDef``. ``MemoryPhi``\ s are ``PhiNode``\ s, but for memory operations. If at any point we have two (or more) ``MemoryDef``\ s that could flow into a @@ -61,11 +83,6 @@ reach a phi node may or may not clobber a given variable). ``MemoryUse``\ s are operations which use but don't modify memory. An example of a ``MemoryUse`` is a ``load``, or a ``readonly`` function call. -``MemoryDef``\ s are operations which may either modify memory, or which -introduce some kind of ordering constraints. Examples of ``MemoryDef``\ s -include ``store``\ s, function calls, ``load``\ s with ``acquire`` (or higher) -ordering, volatile operations, memory fences, etc. - Every function that exists has a special ``MemoryDef`` called ``liveOnEntry``. It dominates every ``MemoryAccess`` in the function that ``MemorySSA`` is being run on, and implies that we've hit the top of the function. It's the only @@ -75,14 +92,28 @@ defined before the function begins. An example of all of this overlaid on LLVM IR (obtained by running ``opt -passes='print' -disable-output`` on an ``.ll`` file) is below. When -viewing this example, it may be helpful to view it in terms of clobbers. The -operands of a given ``MemoryAccess`` are all (potential) clobbers of said -MemoryAccess, and the value produced by a ``MemoryAccess`` can act as a clobber -for other ``MemoryAccess``\ es. Another useful way of looking at it is in -terms of heap versions. In that view, operands of a given -``MemoryAccess`` are the version of the heap before the operation, and -if the access produces a value, the value is the new version of the heap -after the operation. +viewing this example, it may be helpful to view it in terms of clobbers. +The operands of a given ``MemoryAccess`` are all (potential) clobbers of said +``MemoryAccess``, and the value produced by a ``MemoryAccess`` can act as a clobber +for other ``MemoryAccess``\ es. + +If a ``MemoryAccess`` is a *clobber* of another, it means that these two +``MemoryAccess``\ es may access the same memory. For example, ``x = MemoryDef(y)`` +means that ``x`` potentially modifies memory that ``y`` modifies/constrains +(or has modified / constrained). +In the same manner, ``a = MemoryPhi({BB1,b},{BB2,c})`` means that +anyone that uses ``a`` is accessing memory potentially modified / constrained +by either ``b`` or ``c`` (or both). And finally, ``MemoryUse(x)`` means +that this use accesses memory that ``x`` has modified / constrained +(as an example, think that if ``x = MemoryDef(...)`` +and ``MemoryUse(x)`` are in the same loop, the use can't +be hoisted outside alone). + +Another useful way of looking at it is in terms of memory versions. +In that view, operands of a given ``MemoryAccess`` are the version +of the entire memory before the operation, and if the access produces +a value (i.e. ``MemoryDef/MemoryPhi``), +the value is the new version of the memory after the operation. .. code-block:: llvm @@ -96,7 +127,7 @@ after the operation. br label %while.cond while.cond: - ; 6 = MemoryPhi({%0,1},{if.end,4}) + ; 6 = MemoryPhi({entry,1},{if.end,4}) br i1 undef, label %if.then, label %if.else if.then: @@ -148,8 +179,8 @@ Going from the top down: reaching definition is ``5``. - ``MemoryUse(1)`` notes that ``load i8, i8* %p3`` is just a user of memory, and the last thing that could clobber this use is above ``while.cond`` (e.g. - the store to ``%p3``). In heap versioning parlance, it really only depends on - the heap version 1, and is unaffected by the new heap versions generated since + the store to ``%p3``). In memory versioning parlance, it really only depends on + the memory version 1, and is unaffected by the new memory versions generated since then. As an aside, ``MemoryAccess`` is a ``Value`` mostly for convenience; it's not @@ -222,7 +253,7 @@ second ``MemoryUse`` in ``if.end`` has an operand of ``1``, which is a value numbering, etc, faster and easier. It is not possible to optimize ``MemoryDef`` in the same way, as we -restrict ``MemorySSA`` to one heap variable and, thus, one Phi node +restrict ``MemorySSA`` to one memory variable and, thus, one Phi node per block. @@ -320,14 +351,14 @@ Precision ``MemorySSA`` in LLVM deliberately trades off precision for speed. Let us think about memory variables as if they were disjoint partitions of the -heap (that is, if you have one variable, as above, it represents the entire -heap, and if you have multiple variables, each one represents some -disjoint portion of the heap) +memory (that is, if you have one variable, as above, it represents the entire +memory, and if you have multiple variables, each one represents some +disjoint portion of the memory) First, because alias analysis results conflict with each other, and each result may be what an analysis wants (IE TBAA may say no-alias, and something else may say must-alias), it is -not possible to partition the heap the way every optimization wants. +not possible to partition the memory the way every optimization wants. Second, some alias analysis results are not transitive (IE A noalias B, and B noalias C, does not mean A noalias C), so it is not possible to come up with a precise partitioning in all cases without variables to From 8f1156a7d004d97e9f75484a00dc4278698fd8ea Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Tue, 26 May 2020 18:47:56 +0100 Subject: [PATCH 116/770] [clang-format] Fix an ObjC regression introduced with new [[likely]][[unlikely]] support in if/else clauses Summary: {D80144} introduce an ObjC regression Only parse the `[]` if what follows is really an attribute Reviewers: krasimir, JakeMerdichAMD Reviewed By: krasimir Subscribers: rdwampler, aaron.ballman, curdeius, cfe-commits Tags: #clang, #clang-format Differential Revision: https://reviews.llvm.org/D80547 --- clang/lib/Format/UnwrappedLineParser.cpp | 49 ++++++++++++++++++++++- clang/lib/Format/UnwrappedLineParser.h | 1 + clang/unittests/Format/FormatTest.cpp | 5 +++ clang/unittests/Format/FormatTestObjC.cpp | 19 +++++++++ 4 files changed, 72 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 03b6e0c9ef744..b8da2c23b55ac 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1962,7 +1962,7 @@ void UnwrappedLineParser::parseIfThenElse() { if (FormatTok->Tok.is(tok::l_paren)) parseParens(); // handle [[likely]] / [[unlikely]] - if (FormatTok->is(tok::l_square)) + if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) parseSquare(); bool NeedsUnwrappedLine = false; if (FormatTok->Tok.is(tok::l_brace)) { @@ -1981,7 +1981,7 @@ void UnwrappedLineParser::parseIfThenElse() { if (FormatTok->Tok.is(tok::kw_else)) { nextToken(); // handle [[likely]] / [[unlikely]] - if (FormatTok->is(tok::l_square)) + if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute()) parseSquare(); if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); @@ -2343,6 +2343,51 @@ bool UnwrappedLineParser::parseEnum() { // "} n, m;" will end up in one unwrapped line. } +namespace { +// A class used to set and restore the Token position when peeking +// ahead in the token source. +class ScopedTokenPosition { + unsigned StoredPosition; + FormatTokenSource *Tokens; + +public: + ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { + assert(Tokens && "Tokens expected to not be null"); + StoredPosition = Tokens->getPosition(); + } + + ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } +}; +} // namespace + +// Look to see if we have [[ by looking ahead, if +// its not then rewind to the original position. +bool UnwrappedLineParser::tryToParseSimpleAttribute() { + ScopedTokenPosition AutoPosition(Tokens); + FormatToken *Tok = Tokens->getNextToken(); + // We already read the first [ check for the second. + if (Tok && !Tok->is(tok::l_square)) { + return false; + } + // Double check that the attribute is just something + // fairly simple. + while (Tok) { + if (Tok->is(tok::r_square)) { + break; + } + Tok = Tokens->getNextToken(); + } + Tok = Tokens->getNextToken(); + if (Tok && !Tok->is(tok::r_square)) { + return false; + } + Tok = Tokens->getNextToken(); + if (Tok && Tok->is(tok::semi)) { + return false; + } + return true; +} + void UnwrappedLineParser::parseJavaEnumBody() { // Determine whether the enum is simple, i.e. does not have a semicolon or // constants with class bodies. Simple enums can be formatted like braced diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 8d4118ab6dc7d..8b3aa4c84edba 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -134,6 +134,7 @@ class UnwrappedLineParser { bool tryToParseLambdaIntroducer(); bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); + bool tryToParseSimpleAttribute(); void addUnwrappedLine(); bool eof() const; // LevelDifference is the difference of levels after and before the current diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a3b70bfd28245..eea0b364d97c5 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -16513,6 +16513,11 @@ TEST_F(FormatTest, LikelyUnlikely) { " return 42;\n" "}\n", Style); + + verifyFormat("if (argc > 5) [[gnu::unused]] {\n" + " return 29;\n" + "}", + Style); } TEST_F(FormatTest, LLVMDefaultStyle) { diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp index d73d090a8ba37..28d33dcdaa541 100644 --- a/clang/unittests/Format/FormatTestObjC.cpp +++ b/clang/unittests/Format/FormatTestObjC.cpp @@ -1434,6 +1434,25 @@ TEST_F(FormatTestObjC, BreakLineBeforeNestedBlockParam) { " }]"); } +TEST_F(FormatTestObjC, IfNotUnlikely) { + Style = getGoogleStyle(FormatStyle::LK_ObjC); + + verifyFormat("if (argc < 5) [obj func:arg];"); + verifyFormat("if (argc < 5) [[obj1 method1:arg1] method2:arg2];"); + verifyFormat("if (argc < 5) [[foo bar] baz:i[0]];"); + verifyFormat("if (argc < 5) [[foo bar] baz:i[0]][1];"); + + verifyFormat("if (argc < 5)\n" + " [obj func:arg];\n" + "else\n" + " [obj func:arg2];"); + + verifyFormat("if (argc < 5) [[unlikely]]\n" + " [obj func:arg];\n" + "else [[likely]]\n" + " [obj func:arg2];"); +} + } // end namespace } // end namespace format } // end namespace clang From d70ec366c91b2a5fc6334e6f6ca9c4d9a6785c5e Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Tue, 26 May 2020 13:48:20 +0200 Subject: [PATCH 117/770] [Analyzer][NFC] Remove the SubEngine interface The `SubEngine` interface is an interface with only one implementation `EpxrEngine`. Adding other implementations are difficult and very unlikely in the near future. Currently, if anything from `ExprEngine` is to be exposed to other classes it is moved to `SubEngine` which restricts the alternative implementations. The virtual methods are have a slight perofrmance impact. Furthermore, instead of the `LLVM`-style inheritance a native inheritance is used here, which renders `LLVM` functions like e.g. `cast()` unusable here. This patch removes this interface and allows usage of `ExprEngine` directly. Differential Revision: https://reviews.llvm.org/D80548 --- .../Core/PathSensitive/ConstraintManager.h | 7 +- .../Core/PathSensitive/CoreEngine.h | 6 +- .../Core/PathSensitive/ExprEngine.h | 95 ++++++---- .../Core/PathSensitive/ProgramState.h | 10 +- .../PathSensitive/RangedConstraintManager.h | 4 +- .../Core/PathSensitive/SMTConstraintManager.h | 5 +- .../PathSensitive/SimpleConstraintManager.h | 6 +- .../Core/PathSensitive/SubEngine.h | 178 ------------------ .../Core/BugReporterVisitors.cpp | 1 - clang/lib/StaticAnalyzer/Core/CMakeLists.txt | 1 - clang/lib/StaticAnalyzer/Core/CallEvent.cpp | 2 +- clang/lib/StaticAnalyzer/Core/CoreEngine.cpp | 41 ++-- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2 + .../lib/StaticAnalyzer/Core/ProgramState.cpp | 10 +- .../Core/RangeConstraintManager.cpp | 7 +- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 4 +- .../Core/SMTConstraintManager.cpp | 2 +- clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 2 +- .../Core/SimpleConstraintManager.cpp | 4 +- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 2 +- clang/lib/StaticAnalyzer/Core/SubEngine.cpp | 13 -- 21 files changed, 118 insertions(+), 284 deletions(-) delete mode 100644 clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h delete mode 100644 clang/lib/StaticAnalyzer/Core/SubEngine.cpp diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h index 935b2bb7b937d..335536b6a3106 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h @@ -32,7 +32,7 @@ namespace clang { namespace ento { class ProgramStateManager; -class SubEngine; +class ExprEngine; class SymbolReaper; class ConditionTruthVal { @@ -193,10 +193,11 @@ class ConstraintManager { std::unique_ptr CreateRangeConstraintManager(ProgramStateManager &statemgr, - SubEngine *subengine); + ExprEngine *exprengine); std::unique_ptr -CreateZ3ConstraintManager(ProgramStateManager &statemgr, SubEngine *subengine); +CreateZ3ConstraintManager(ProgramStateManager &statemgr, + ExprEngine *exprengine); } // namespace ento } // namespace clang diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h index 278193ef99ede..2aca2c99ef4fd 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h @@ -41,7 +41,7 @@ class LabelDecl; namespace ento { class FunctionSummariesTy; -class SubEngine; +class ExprEngine; //===----------------------------------------------------------------------===// /// CoreEngine - Implements the core logic of the graph-reachability @@ -69,7 +69,7 @@ class CoreEngine { std::vector>; private: - SubEngine &SubEng; + ExprEngine &ExprEng; /// G - The simulation graph. Each node is a (location,state) pair. mutable ExplodedGraph G; @@ -129,7 +129,7 @@ class CoreEngine { public: /// Construct a CoreEngine object to analyze the provided CFG. - CoreEngine(SubEngine &subengine, + CoreEngine(ExprEngine &exprengine, FunctionSummariesTy *FS, AnalyzerOptions &Opts); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index c66c54116a0c6..a94c847f35ee1 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -21,6 +21,7 @@ #include "clang/Analysis/DomainSpecific/ObjCNoReturn.h" #include "clang/Analysis/ProgramPoint.h" #include "clang/Basic/LLVM.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" @@ -29,9 +30,9 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h" #include "llvm/ADT/ArrayRef.h" #include @@ -42,6 +43,8 @@ namespace clang { class AnalysisDeclContextManager; class AnalyzerOptions; class ASTContext; +class CFGBlock; +class CFGElement; class ConstructionContext; class CXXBindTemporaryExpr; class CXXCatchStmt; @@ -72,16 +75,29 @@ class CrossTranslationUnitContext; namespace ento { +class AnalysisManager; class BasicValueFactory; +class BlockCounter; +class BranchNodeBuilder; class CallEvent; class CheckerManager; class ConstraintManager; class CXXTempObjectRegion; +class EndOfFunctionNodeBuilder; +class ExplodedNodeSet; +class ExplodedNode; +class IndirectGotoNodeBuilder; class MemRegion; +struct NodeBuilderContext; +class NodeBuilderWithSinks; +class ProgramState; +class ProgramStateManager; class RegionAndSymbolInvalidationTraits; class SymbolManager; +class SwitchNodeBuilder; -class ExprEngine : public SubEngine { +class ExprEngine { + virtual void anchor(); public: /// The modes of inlining, which override the default analysis-wide settings. enum InliningModes { @@ -161,7 +177,7 @@ class ExprEngine : public SubEngine { SetOfConstDecls *VisitedCalleesIn, FunctionSummariesTy *FS, InliningModes HowToInlineIn); - ~ExprEngine() override = default; + ~ExprEngine() = default; /// Returns true if there is still simulation state on the worklist. bool ExecuteWorkList(const LocationContext *L, unsigned Steps = 150000) { @@ -181,7 +197,7 @@ class ExprEngine : public SubEngine { /// getContext - Return the ASTContext associated with this analysis. ASTContext &getContext() const { return AMgr.getASTContext(); } - AnalysisManager &getAnalysisManager() override { return AMgr; } + AnalysisManager &getAnalysisManager() { return AMgr; } AnalysisDeclContextManager &getAnalysisDeclContextManager() { return AMgr.getAnalysisDeclContextManager(); @@ -196,7 +212,7 @@ class ExprEngine : public SubEngine { BugReporter &getBugReporter() { return BR; } cross_tu::CrossTranslationUnitContext * - getCrossTranslationUnitContext() override { + getCrossTranslationUnitContext() { return &CTU; } @@ -232,7 +248,7 @@ class ExprEngine : public SubEngine { /// getInitialState - Return the initial state used for the root vertex /// in the ExplodedGraph. - ProgramStateRef getInitialState(const LocationContext *InitLoc) override; + ProgramStateRef getInitialState(const LocationContext *InitLoc); ExplodedGraph &getGraph() { return G; } const ExplodedGraph &getGraph() const { return G; } @@ -270,7 +286,7 @@ class ExprEngine : public SubEngine { /// processCFGElement - Called by CoreEngine. Used to generate new successor /// nodes by processing the 'effects' of a CFG element. void processCFGElement(const CFGElement E, ExplodedNode *Pred, - unsigned StmtIdx, NodeBuilderContext *Ctx) override; + unsigned StmtIdx, NodeBuilderContext *Ctx); void ProcessStmt(const Stmt *S, ExplodedNode *Pred); @@ -296,7 +312,7 @@ class ExprEngine : public SubEngine { /// Called by CoreEngine when processing the entrance of a CFGBlock. void processCFGBlockEntrance(const BlockEdge &L, NodeBuilderWithSinks &nodeBuilder, - ExplodedNode *Pred) override; + ExplodedNode *Pred); /// ProcessBranch - Called by CoreEngine. Used to generate successor /// nodes by processing the 'effects' of a branch condition. @@ -305,7 +321,7 @@ class ExprEngine : public SubEngine { ExplodedNode *Pred, ExplodedNodeSet &Dst, const CFGBlock *DstT, - const CFGBlock *DstF) override; + const CFGBlock *DstF); /// Called by CoreEngine. /// Used to generate successor nodes for temporary destructors depending @@ -314,7 +330,7 @@ class ExprEngine : public SubEngine { NodeBuilderContext &BldCtx, ExplodedNode *Pred, ExplodedNodeSet &Dst, const CFGBlock *DstT, - const CFGBlock *DstF) override; + const CFGBlock *DstF); /// Called by CoreEngine. Used to processing branching behavior /// at static initializers. @@ -323,27 +339,27 @@ class ExprEngine : public SubEngine { ExplodedNode *Pred, ExplodedNodeSet &Dst, const CFGBlock *DstT, - const CFGBlock *DstF) override; + const CFGBlock *DstF); /// processIndirectGoto - Called by CoreEngine. Used to generate successor /// nodes by processing the 'effects' of a computed goto jump. - void processIndirectGoto(IndirectGotoNodeBuilder& builder) override; + void processIndirectGoto(IndirectGotoNodeBuilder& builder); /// ProcessSwitch - Called by CoreEngine. Used to generate successor /// nodes by processing the 'effects' of a switch statement. - void processSwitch(SwitchNodeBuilder& builder) override; + void processSwitch(SwitchNodeBuilder& builder); /// Called by CoreEngine. Used to notify checkers that processing a /// function has begun. Called for both inlined and and top-level functions. void processBeginOfFunction(NodeBuilderContext &BC, ExplodedNode *Pred, ExplodedNodeSet &Dst, - const BlockEdge &L) override; + const BlockEdge &L); /// Called by CoreEngine. Used to notify checkers that processing a /// function has ended. Called for both inlined and and top-level functions. void processEndOfFunction(NodeBuilderContext& BC, ExplodedNode *Pred, - const ReturnStmt *RS = nullptr) override; + const ReturnStmt *RS = nullptr); /// Remove dead bindings/symbols before exiting a function. void removeDeadOnEndOfFunction(NodeBuilderContext& BC, @@ -352,19 +368,19 @@ class ExprEngine : public SubEngine { /// Generate the entry node of the callee. void processCallEnter(NodeBuilderContext& BC, CallEnter CE, - ExplodedNode *Pred) override; + ExplodedNode *Pred); /// Generate the sequence of nodes that simulate the call exit and the post /// visit for CallExpr. - void processCallExit(ExplodedNode *Pred) override; + void processCallExit(ExplodedNode *Pred); /// Called by CoreEngine when the analysis worklist has terminated. - void processEndWorklist() override; + void processEndWorklist(); /// evalAssume - Callback function invoked by the ConstraintManager when /// making assumptions about state values. ProgramStateRef processAssume(ProgramStateRef state, SVal cond, - bool assumption) override; + bool assumption); /// processRegionChanges - Called by ProgramStateManager whenever a change is made /// to the store. Used to update checkers that track region values. @@ -374,14 +390,21 @@ class ExprEngine : public SubEngine { ArrayRef ExplicitRegions, ArrayRef Regions, const LocationContext *LCtx, - const CallEvent *Call) override; + const CallEvent *Call); + + inline ProgramStateRef + processRegionChange(ProgramStateRef state, + const MemRegion* MR, + const LocationContext *LCtx) { + return processRegionChanges(state, nullptr, MR, MR, LCtx, nullptr); + } /// printJson - Called by ProgramStateManager to print checker-specific data. void printJson(raw_ostream &Out, ProgramStateRef State, const LocationContext *LCtx, const char *NL, - unsigned int Space, bool IsDot) const override; + unsigned int Space, bool IsDot) const; - ProgramStateManager &getStateManager() override { return StateMgr; } + ProgramStateManager &getStateManager() { return StateMgr; } StoreManager &getStoreManager() { return StateMgr.getStoreManager(); } @@ -608,23 +631,11 @@ class ExprEngine : public SubEngine { const ConstructionContextItem &Item, const LocationContext *LC); -protected: - /// evalBind - Handle the semantics of binding a value to a specific location. - /// This method is used by evalStore, VisitDeclStmt, and others. - void evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, ExplodedNode *Pred, - SVal location, SVal Val, bool atDeclInit = false, - const ProgramPoint *PP = nullptr); - /// Call PointerEscape callback when a value escapes as a result of bind. ProgramStateRef processPointerEscapedOnBind( ProgramStateRef State, ArrayRef> LocAndVals, const LocationContext *LCtx, PointerEscapeKind Kind, - const CallEvent *Call) override; - - ProgramStateRef - processPointerEscapedOnBind(ProgramStateRef State, - SVal Loc, SVal Val, - const LocationContext *LCtx); + const CallEvent *Call); /// Call PointerEscape callback when a value escapes as a result of /// region invalidation. @@ -634,7 +645,19 @@ class ExprEngine : public SubEngine { const InvalidatedSymbols *Invalidated, ArrayRef ExplicitRegions, const CallEvent *Call, - RegionAndSymbolInvalidationTraits &ITraits) override; + RegionAndSymbolInvalidationTraits &ITraits); + +private: + /// evalBind - Handle the semantics of binding a value to a specific location. + /// This method is used by evalStore, VisitDeclStmt, and others. + void evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, ExplodedNode *Pred, + SVal location, SVal Val, bool atDeclInit = false, + const ProgramPoint *PP = nullptr); + + ProgramStateRef + processPointerEscapedOnBind(ProgramStateRef State, + SVal Loc, SVal Val, + const LocationContext *LCtx); /// A simple wrapper when you only need to notify checkers of pointer-escape /// of some values. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index ecb61bffe3d95..a0d7db6dd860c 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -39,7 +39,7 @@ class CallEvent; class CallEventManager; typedef std::unique_ptr(*ConstraintManagerCreator)( - ProgramStateManager &, SubEngine *); + ProgramStateManager &, ExprEngine *); typedef std::unique_ptr(*StoreManagerCreator)( ProgramStateManager &); @@ -460,8 +460,8 @@ class ProgramStateManager { friend class ProgramState; friend void ProgramStateRelease(const ProgramState *state); private: - /// Eng - The SubEngine that owns this state manager. - SubEngine *Eng; /* Can be null. */ + /// Eng - The ExprEngine that owns this state manager. + ExprEngine *Eng; /* Can be null. */ EnvironmentManager EnvMgr; std::unique_ptr StoreMgr; @@ -493,7 +493,7 @@ class ProgramStateManager { StoreManagerCreator CreateStoreManager, ConstraintManagerCreator CreateConstraintManager, llvm::BumpPtrAllocator& alloc, - SubEngine *subeng); + ExprEngine *expreng); ~ProgramStateManager(); @@ -534,7 +534,7 @@ class ProgramStateManager { StoreManager &getStoreManager() { return *StoreMgr; } ConstraintManager &getConstraintManager() { return *ConstraintMgr; } - SubEngine &getOwningEngine() { return *Eng; } + ExprEngine &getOwningEngine() { return *Eng; } ProgramStateRef removeDeadBindingsFromEnvironmentAndStore(ProgramStateRef St, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h index a9ca3451d8f3e..c72f8292647dc 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h @@ -137,8 +137,8 @@ struct ProgramStateTrait class RangedConstraintManager : public SimpleConstraintManager { public: - RangedConstraintManager(SubEngine *SE, SValBuilder &SB) - : SimpleConstraintManager(SE, SB) {} + RangedConstraintManager(ExprEngine *EE, SValBuilder &SB) + : SimpleConstraintManager(EE, SB) {} ~RangedConstraintManager() override; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h index 294a45b214d7e..6a0f5f10874e3 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h @@ -31,8 +31,9 @@ class SMTConstraintManager : public clang::ento::SimpleConstraintManager { mutable llvm::SMTSolverRef Solver = llvm::CreateZ3Solver(); public: - SMTConstraintManager(clang::ento::SubEngine *SE, clang::ento::SValBuilder &SB) - : SimpleConstraintManager(SE, SB) {} + SMTConstraintManager(clang::ento::ExprEngine *EE, + clang::ento::SValBuilder &SB) + : SimpleConstraintManager(EE, SB) {} virtual ~SMTConstraintManager() = default; //===------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h index 6bf5e94afdbb6..87e927f5b4800 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h @@ -21,12 +21,12 @@ namespace clang { namespace ento { class SimpleConstraintManager : public ConstraintManager { - SubEngine *SU; + ExprEngine *EE; SValBuilder &SVB; public: - SimpleConstraintManager(SubEngine *subengine, SValBuilder &SB) - : SU(subengine), SVB(SB) {} + SimpleConstraintManager(ExprEngine *exprengine, SValBuilder &SB) + : EE(exprengine), SVB(SB) {} ~SimpleConstraintManager() override; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h deleted file mode 100644 index a7f3c28d4373a..0000000000000 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h +++ /dev/null @@ -1,178 +0,0 @@ -//== SubEngine.h - Interface of the subengine of CoreEngine --------*- C++ -*-// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface of a subengine of the CoreEngine. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SUBENGINE_H -#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SUBENGINE_H - -#include "clang/Analysis/ProgramPoint.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" -#include "clang/StaticAnalyzer/Core/CheckerManager.h" - -namespace clang { - -class CFGBlock; -class CFGElement; -class LocationContext; -class Stmt; - -namespace cross_tu { -class CrossTranslationUnitContext; -} - -namespace ento { - -struct NodeBuilderContext; -class AnalysisManager; -class ExplodedNodeSet; -class ExplodedNode; -class ProgramState; -class ProgramStateManager; -class BlockCounter; -class BranchNodeBuilder; -class IndirectGotoNodeBuilder; -class SwitchNodeBuilder; -class EndOfFunctionNodeBuilder; -class NodeBuilderWithSinks; -class MemRegion; - -class SubEngine { - virtual void anchor(); -public: - virtual ~SubEngine() {} - - virtual ProgramStateRef getInitialState(const LocationContext *InitLoc) = 0; - - virtual AnalysisManager &getAnalysisManager() = 0; - - virtual cross_tu::CrossTranslationUnitContext * - getCrossTranslationUnitContext() = 0; - - virtual ProgramStateManager &getStateManager() = 0; - - /// Called by CoreEngine. Used to generate new successor - /// nodes by processing the 'effects' of a block-level statement. - virtual void processCFGElement(const CFGElement E, ExplodedNode* Pred, - unsigned StmtIdx, NodeBuilderContext *Ctx)=0; - - /// Called by CoreEngine when it starts processing a CFGBlock. The - /// SubEngine is expected to populate dstNodes with new nodes representing - /// updated analysis state, or generate no nodes at all if it doesn't. - virtual void processCFGBlockEntrance(const BlockEdge &L, - NodeBuilderWithSinks &nodeBuilder, - ExplodedNode *Pred) = 0; - - /// Called by CoreEngine. Used to generate successor - /// nodes by processing the 'effects' of a branch condition. - virtual void processBranch(const Stmt *Condition, - NodeBuilderContext& BuilderCtx, - ExplodedNode *Pred, - ExplodedNodeSet &Dst, - const CFGBlock *DstT, - const CFGBlock *DstF) = 0; - - /// Called by CoreEngine. - /// Used to generate successor nodes for temporary destructors depending - /// on whether the corresponding constructor was visited. - virtual void processCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE, - NodeBuilderContext &BldCtx, - ExplodedNode *Pred, - ExplodedNodeSet &Dst, - const CFGBlock *DstT, - const CFGBlock *DstF) = 0; - - /// Called by CoreEngine. Used to processing branching behavior - /// at static initializers. - virtual void processStaticInitializer(const DeclStmt *DS, - NodeBuilderContext& BuilderCtx, - ExplodedNode *Pred, - ExplodedNodeSet &Dst, - const CFGBlock *DstT, - const CFGBlock *DstF) = 0; - - /// Called by CoreEngine. Used to generate successor - /// nodes by processing the 'effects' of a computed goto jump. - virtual void processIndirectGoto(IndirectGotoNodeBuilder& builder) = 0; - - /// Called by CoreEngine. Used to generate successor - /// nodes by processing the 'effects' of a switch statement. - virtual void processSwitch(SwitchNodeBuilder& builder) = 0; - - /// Called by CoreEngine. Used to notify checkers that processing a - /// function has begun. Called for both inlined and and top-level functions. - virtual void processBeginOfFunction(NodeBuilderContext &BC, - ExplodedNode *Pred, - ExplodedNodeSet &Dst, - const BlockEdge &L) = 0; - - /// Called by CoreEngine. Used to notify checkers that processing a - /// function has ended. Called for both inlined and and top-level functions. - virtual void processEndOfFunction(NodeBuilderContext& BC, - ExplodedNode *Pred, - const ReturnStmt *RS = nullptr) = 0; - - // Generate the entry node of the callee. - virtual void processCallEnter(NodeBuilderContext& BC, CallEnter CE, - ExplodedNode *Pred) = 0; - - // Generate the first post callsite node. - virtual void processCallExit(ExplodedNode *Pred) = 0; - - /// Called by ConstraintManager. Used to call checker-specific - /// logic for handling assumptions on symbolic values. - virtual ProgramStateRef processAssume(ProgramStateRef state, - SVal cond, bool assumption) = 0; - - /// processRegionChanges - Called by ProgramStateManager whenever a change is - /// made to the store. Used to update checkers that track region values. - virtual ProgramStateRef - processRegionChanges(ProgramStateRef state, - const InvalidatedSymbols *invalidated, - ArrayRef ExplicitRegions, - ArrayRef Regions, - const LocationContext *LCtx, - const CallEvent *Call) = 0; - - - inline ProgramStateRef - processRegionChange(ProgramStateRef state, - const MemRegion* MR, - const LocationContext *LCtx) { - return processRegionChanges(state, nullptr, MR, MR, LCtx, nullptr); - } - - virtual ProgramStateRef processPointerEscapedOnBind( - ProgramStateRef State, ArrayRef> LocAndVals, - const LocationContext *LCtx, PointerEscapeKind Kind, - const CallEvent *Call) = 0; - - virtual ProgramStateRef - notifyCheckersOfPointerEscape(ProgramStateRef State, - const InvalidatedSymbols *Invalidated, - ArrayRef ExplicitRegions, - const CallEvent *Call, - RegionAndSymbolInvalidationTraits &HTraits) = 0; - - /// printJson - Called by ProgramStateManager to print checker-specific data. - virtual void printJson(raw_ostream &Out, ProgramStateRef State, - const LocationContext *LCtx, const char *NL, - unsigned int Space, bool IsDot) const = 0; - - /// Called by CoreEngine when the analysis worklist is either empty or the - // maximum number of analysis steps have been reached. - virtual void processEndWorklist() = 0; -}; - -} // end GR namespace - -} // end clang namespace - -#endif diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 3b2e5cd28e437..ad79f7cb9359f 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -45,7 +45,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" diff --git a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt index 057cdd4bb18ab..233ffaf799568 100644 --- a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt @@ -44,7 +44,6 @@ add_clang_library(clangStaticAnalyzerCore SimpleSValBuilder.cpp SMTConstraintManager.cpp Store.cpp - SubEngine.cpp SValBuilder.cpp SVals.cpp SymbolManager.cpp diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 1ea7c26dc76b0..fb728ac9e4f5a 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -564,7 +564,7 @@ RuntimeDefinition AnyFunctionCall::getRuntimeDefinition() const { return RuntimeDefinition(Decl); } - SubEngine &Engine = getState()->getStateManager().getOwningEngine(); + ExprEngine &Engine = getState()->getStateManager().getOwningEngine(); AnalyzerOptions &Opts = Engine.getAnalysisManager().options; // Try to get CTU definition only if CTUDir is provided. diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp index 5a49b18aecf12..70deb13a8e1ae 100644 --- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp @@ -23,8 +23,8 @@ #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -52,8 +52,7 @@ STATISTIC(NumPathsExplored, // Core analysis engine. //===----------------------------------------------------------------------===// -static std::unique_ptr generateWorkList(AnalyzerOptions &Opts, - SubEngine &subengine) { +static std::unique_ptr generateWorkList(AnalyzerOptions &Opts) { switch (Opts.getExplorationStrategy()) { case ExplorationStrategyKind::DFS: return WorkList::makeDFS(); @@ -71,9 +70,9 @@ static std::unique_ptr generateWorkList(AnalyzerOptions &Opts, llvm_unreachable("Unknown AnalyzerOptions::ExplorationStrategyKind"); } -CoreEngine::CoreEngine(SubEngine &subengine, FunctionSummariesTy *FS, +CoreEngine::CoreEngine(ExprEngine &exprengine, FunctionSummariesTy *FS, AnalyzerOptions &Opts) - : SubEng(subengine), WList(generateWorkList(Opts, subengine)), + : ExprEng(exprengine), WList(generateWorkList(Opts)), BCounterFactory(G.getAllocator()), FunctionSummaries(FS) {} /// ExecuteWorkList - Run the worklist algorithm for a maximum number of steps. @@ -104,7 +103,7 @@ bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned Steps, WList->setBlockCounter(BCounterFactory.GetEmptyCounter()); if (!InitState) - InitState = SubEng.getInitialState(L); + InitState = ExprEng.getInitialState(L); bool IsNew; ExplodedNode *Node = G.getNode(StartLoc, InitState, false, &IsNew); @@ -113,7 +112,7 @@ bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned Steps, NodeBuilderContext BuilderCtx(*this, StartLoc.getDst(), Node); ExplodedNodeSet DstBegin; - SubEng.processBeginOfFunction(BuilderCtx, Node, DstBegin, StartLoc); + ExprEng.processBeginOfFunction(BuilderCtx, Node, DstBegin, StartLoc); enqueue(DstBegin); } @@ -147,7 +146,7 @@ bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned Steps, dispatchWorkItem(Node, Node->getLocation(), WU); } - SubEng.processEndWorklist(); + ExprEng.processEndWorklist(); return WList->hasWork(); } @@ -172,7 +171,7 @@ void CoreEngine::dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc, break; case ProgramPoint::CallExitBeginKind: - SubEng.processCallExit(Pred); + ExprEng.processCallExit(Pred); break; case ProgramPoint::EpsilonKind: { @@ -253,17 +252,17 @@ void CoreEngine::HandleBlockEdge(const BlockEdge &L, ExplodedNode *Pred) { } // Process the final state transition. - SubEng.processEndOfFunction(BuilderCtx, Pred, RS); + ExprEng.processEndOfFunction(BuilderCtx, Pred, RS); // This path is done. Don't enqueue any more nodes. return; } - // Call into the SubEngine to process entering the CFGBlock. + // Call into the ExprEngine to process entering the CFGBlock. ExplodedNodeSet dstNodes; BlockEntrance BE(Blk, Pred->getLocationContext()); NodeBuilderWithSinks nodeBuilder(Pred, dstNodes, BuilderCtx, BE); - SubEng.processCFGBlockEntrance(L, nodeBuilder, Pred); + ExprEng.processCFGBlockEntrance(L, nodeBuilder, Pred); // Auto-generate a node. if (!nodeBuilder.hasGeneratedNodes()) { @@ -287,7 +286,7 @@ void CoreEngine::HandleBlockEntrance(const BlockEntrance &L, // Process the entrance of the block. if (Optional E = L.getFirstElement()) { NodeBuilderContext Ctx(*this, L.getBlock(), Pred); - SubEng.processCFGElement(*E, Pred, 0, &Ctx); + ExprEng.processCFGElement(*E, Pred, 0, &Ctx); } else HandleBlockExit(L.getBlock(), Pred); @@ -367,7 +366,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) { builder(Pred, B, cast(Term)->getTarget(), *(B->succ_begin()), this); - SubEng.processIndirectGoto(builder); + ExprEng.processIndirectGoto(builder); return; } @@ -378,7 +377,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) { // 'element' variable to a value. // (2) in a terminator, which represents the branch. // - // For (1), subengines will bind a value (i.e., 0 or 1) indicating + // For (1), ExprEngine will bind a value (i.e., 0 or 1) indicating // whether or not collection contains any more elements. We cannot // just test to see if the element is nil because a container can // contain nil elements. @@ -389,7 +388,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) { SwitchNodeBuilder builder(Pred, B, cast(Term)->getCond(), this); - SubEng.processSwitch(builder); + ExprEng.processSwitch(builder); return; } @@ -418,7 +417,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) { void CoreEngine::HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred) { NodeBuilderContext BuilderCtx(*this, CE.getEntry(), Pred); - SubEng.processCallEnter(BuilderCtx, CE, Pred); + ExprEng.processCallEnter(BuilderCtx, CE, Pred); } void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term, @@ -426,7 +425,7 @@ void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term, assert(B->succ_size() == 2); NodeBuilderContext Ctx(*this, B, Pred); ExplodedNodeSet Dst; - SubEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()), + ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()), *(B->succ_begin() + 1)); // Enqueue the new frontier onto the worklist. enqueue(Dst); @@ -438,7 +437,7 @@ void CoreEngine::HandleCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE, assert(B->succ_size() == 2); NodeBuilderContext Ctx(*this, B, Pred); ExplodedNodeSet Dst; - SubEng.processCleanupTemporaryBranch(BTE, Ctx, Pred, Dst, *(B->succ_begin()), + ExprEng.processCleanupTemporaryBranch(BTE, Ctx, Pred, Dst, *(B->succ_begin()), *(B->succ_begin() + 1)); // Enqueue the new frontier onto the worklist. enqueue(Dst); @@ -449,7 +448,7 @@ void CoreEngine::HandleStaticInit(const DeclStmt *DS, const CFGBlock *B, assert(B->succ_size() == 2); NodeBuilderContext Ctx(*this, B, Pred); ExplodedNodeSet Dst; - SubEng.processStaticInitializer(DS, Ctx, Pred, Dst, + ExprEng.processStaticInitializer(DS, Ctx, Pred, Dst, *(B->succ_begin()), *(B->succ_begin()+1)); // Enqueue the new frontier onto the worklist. enqueue(Dst); @@ -464,7 +463,7 @@ void CoreEngine::HandlePostStmt(const CFGBlock *B, unsigned StmtIdx, HandleBlockExit(B, Pred); else { NodeBuilderContext Ctx(*this, B, Pred); - SubEng.processCFGElement((*B)[StmtIdx], Pred, StmtIdx, &Ctx); + ExprEng.processCFGElement((*B)[StmtIdx], Pred, StmtIdx, &Ctx); } } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 36b930faf2d02..6fce27bc95569 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3206,3 +3206,5 @@ void *ProgramStateTrait::GDMIndex() { static int index = 0; return &index; } + +void ExprEngine::anchor() { } diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 3ecee758c676c..006a4006b7fc9 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -16,8 +16,8 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "llvm/Support/raw_ostream.h" using namespace clang; @@ -76,12 +76,12 @@ ProgramStateManager::ProgramStateManager(ASTContext &Ctx, StoreManagerCreator CreateSMgr, ConstraintManagerCreator CreateCMgr, llvm::BumpPtrAllocator &alloc, - SubEngine *SubEng) - : Eng(SubEng), EnvMgr(alloc), GDMFactory(alloc), + ExprEngine *ExprEng) + : Eng(ExprEng), EnvMgr(alloc), GDMFactory(alloc), svalBuilder(createSimpleSValBuilder(alloc, Ctx, *this)), CallEventMgr(new CallEventManager(alloc)), Alloc(alloc) { StoreMgr = (*CreateSMgr)(*this); - ConstraintMgr = (*CreateCMgr)(*this, SubEng); + ConstraintMgr = (*CreateCMgr)(*this, ExprEng); } @@ -189,7 +189,7 @@ ProgramState::invalidateRegionsImpl(ValueList Values, RegionAndSymbolInvalidationTraits *ITraits, const CallEvent *Call) const { ProgramStateManager &Mgr = getStateManager(); - SubEngine &Eng = Mgr.getOwningEngine(); + ExprEngine &Eng = Mgr.getOwningEngine(); InvalidatedSymbols InvalidatedSyms; if (!IS) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 137e2cefe5a04..a3ea7d4c013b9 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -285,8 +285,8 @@ void RangeSet::print(raw_ostream &os) const { namespace { class RangeConstraintManager : public RangedConstraintManager { public: - RangeConstraintManager(SubEngine *SE, SValBuilder &SVB) - : RangedConstraintManager(SE, SVB) {} + RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) + : RangedConstraintManager(EE, SVB) {} //===------------------------------------------------------------------===// // Implementation for interface from ConstraintManager. @@ -374,7 +374,8 @@ class RangeConstraintManager : public RangedConstraintManager { } // end anonymous namespace std::unique_ptr -ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine *Eng) { +ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, + ExprEngine *Eng) { return std::make_unique(Eng, StMgr.getSValBuilder()); } diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 6cca0f5f57d10..2a55c99647124 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -24,10 +24,10 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/raw_ostream.h" @@ -382,7 +382,7 @@ class RegionStoreManager : public StoreManager { : StoreManager(mgr), Features(f), RBFactory(mgr.getAllocator()), CBFactory(mgr.getAllocator()), SmallStructLimit(0) { - SubEngine &Eng = StateMgr.getOwningEngine(); + ExprEngine &Eng = StateMgr.getOwningEngine(); AnalyzerOptions &Options = Eng.getAnalysisManager().options; SmallStructLimit = Options.RegionStoreSmallStructLimit; } diff --git a/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp index 6ad12ca0a688f..7395622a659ca 100644 --- a/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp @@ -13,6 +13,6 @@ using namespace clang; using namespace ento; std::unique_ptr -ento::CreateZ3ConstraintManager(ProgramStateManager &StMgr, SubEngine *Eng) { +ento::CreateZ3ConstraintManager(ProgramStateManager &StMgr, ExprEngine *Eng) { return std::make_unique(Eng, StMgr.getSValBuilder()); } diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 3a5841137e1a7..c00a2c8ba8a2c 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -24,12 +24,12 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" #include "llvm/ADT/APSInt.h" diff --git a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp index 85f60231a2769..3709106ad44ce 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp @@ -44,8 +44,8 @@ ProgramStateRef SimpleConstraintManager::assume(ProgramStateRef State, ProgramStateRef SimpleConstraintManager::assume(ProgramStateRef State, NonLoc Cond, bool Assumption) { State = assumeAux(State, Cond, Assumption); - if (NotifyAssumeClients && SU) - return SU->processAssume(State, Cond, Assumption); + if (NotifyAssumeClients && EE) + return EE->processAssume(State, Cond, Assumption); return State; } diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 84c52f53ca5e7..d9fe3af3c0000 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -13,8 +13,8 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" using namespace clang; diff --git a/clang/lib/StaticAnalyzer/Core/SubEngine.cpp b/clang/lib/StaticAnalyzer/Core/SubEngine.cpp deleted file mode 100644 index d7ddd9cf46105..0000000000000 --- a/clang/lib/StaticAnalyzer/Core/SubEngine.cpp +++ /dev/null @@ -1,13 +0,0 @@ -//== SubEngine.cpp - Interface of the subengine of CoreEngine ------*- C++ -*-// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" - -using namespace clang::ento; - -void SubEngine::anchor() { } From 10f0b18ed950545d10574f5b30d234bd3789d7b2 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 26 May 2020 17:56:17 +0000 Subject: [PATCH 118/770] [gn build] Port d70ec366c91 --- llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Core/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Core/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Core/BUILD.gn index f37cc42c481e8..4c3d175a31808 100644 --- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Core/BUILD.gn @@ -55,7 +55,6 @@ static_library("Core") { "SimpleConstraintManager.cpp", "SimpleSValBuilder.cpp", "Store.cpp", - "SubEngine.cpp", "SymbolManager.cpp", "TextDiagnostics.cpp", "WorkList.cpp", From d1f0a76b21975ba66ec2427c2d3ddb7ed1e63949 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 10:54:12 -0700 Subject: [PATCH 119/770] [YAMLTraits] Remove char trait and serialize as uint8_t in lldb. As discussed in https://reviews.llvm.org/D79745 --- lldb/include/lldb/Utility/Args.h | 2 +- llvm/include/llvm/Support/YAMLTraits.h | 6 ------ llvm/lib/Support/YAMLTraits.cpp | 11 ----------- llvm/unittests/Support/YAMLIOTest.cpp | 8 +------- 4 files changed, 2 insertions(+), 25 deletions(-) diff --git a/lldb/include/lldb/Utility/Args.h b/lldb/include/lldb/Utility/Args.h index 560f25795d3b7..2cce7d0c697c7 100644 --- a/lldb/include/lldb/Utility/Args.h +++ b/lldb/include/lldb/Utility/Args.h @@ -391,7 +391,7 @@ template <> struct MappingTraits { return lldb_private::Args::ArgEntry(value, quote); } StringRef value; - char quote; + uint8_t quote; }; static void mapping(IO &io, lldb_private::Args::ArgEntry &v); }; diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 9aa96401ae179..f93f36037679a 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -1159,12 +1159,6 @@ struct ScalarTraits { static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; -template <> struct ScalarTraits { - static void output(const char &, void *, raw_ostream &); - static StringRef input(StringRef, void *, char &); - static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } -}; - template<> struct ScalarTraits { static void output(const StringRef &, void *, raw_ostream &); diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index a4b782db0a96e..f27be3e974306 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -864,17 +864,6 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, bool &Val) { return "invalid boolean"; } -void ScalarTraits::output(const char &Val, void *, raw_ostream &Out) { - Out << Val; -} - -StringRef ScalarTraits::input(StringRef Scalar, void *, char &Val) { - if (Scalar.size() != 1) - return "invalid character"; - Val = Scalar[0]; - return StringRef(); -} - void ScalarTraits::output(const StringRef &Val, void *, raw_ostream &Out) { Out << Val; diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index b2ea5aab59356..d86489cf75604 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -333,7 +333,6 @@ struct BuiltInTypes { uint16_t u16; uint8_t u8; bool b; - char c; int64_t s64; int32_t s32; int16_t s16; @@ -358,7 +357,6 @@ namespace yaml { io.mapRequired("u16", bt.u16); io.mapRequired("u8", bt.u8); io.mapRequired("b", bt.b); - io.mapRequired("c", bt.c); io.mapRequired("s64", bt.s64); io.mapRequired("s32", bt.s32); io.mapRequired("s16", bt.s16); @@ -388,7 +386,6 @@ TEST(YAMLIO, TestReadBuiltInTypes) { "u16: 65000\n" "u8: 255\n" "b: false\n" - "c: 'c'\n" "s64: -5000000000\n" "s32: -2000000000\n" "s16: -32000\n" @@ -399,7 +396,7 @@ TEST(YAMLIO, TestReadBuiltInTypes) { "h16: 0x8765\n" "h32: 0xFEDCBA98\n" "h64: 0xFEDCBA9876543210\n" - "...\n"); + "...\n"); yin >> map; EXPECT_FALSE(yin.error()); @@ -410,7 +407,6 @@ TEST(YAMLIO, TestReadBuiltInTypes) { EXPECT_EQ(map.u16, 65000); EXPECT_EQ(map.u8, 255); EXPECT_EQ(map.b, false); - EXPECT_EQ(map.c, 'c'); EXPECT_EQ(map.s64, -5000000000LL); EXPECT_EQ(map.s32, -2000000000L); EXPECT_EQ(map.s16, -32000); @@ -438,7 +434,6 @@ TEST(YAMLIO, TestReadWriteBuiltInTypes) { map.u16 = 50000; map.u8 = 254; map.b = true; - map.c = 'd'; map.s64 = -6000000000LL; map.s32 = -2000000000; map.s16 = -32000; @@ -468,7 +463,6 @@ TEST(YAMLIO, TestReadWriteBuiltInTypes) { EXPECT_EQ(map.u16, 50000); EXPECT_EQ(map.u8, 254); EXPECT_EQ(map.b, true); - EXPECT_EQ(map.c, 'd'); EXPECT_EQ(map.s64, -6000000000LL); EXPECT_EQ(map.s32, -2000000000L); EXPECT_EQ(map.s16, -32000); From b8a3c618d6c5df081cad69b5ffb386a7a7b0361f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 26 May 2020 11:06:07 -0700 Subject: [PATCH 120/770] [ELF] Allow misaligned SHT_GNU_verneed Bazel created interface shared objects (.ifso) may be misaligned. We use llvm::support::detail::packed_endian_specific_integral under the hood which allows reading of misaligned values, so there is not a need to diagnose (in LLD we don't intend to support sophisticated parsing for SHT_GNU_*). --- lld/ELF/InputFiles.cpp | 6 ++---- .../{verneed-shared.yaml => verneed-shared.test} | 10 ++++++---- 2 files changed, 8 insertions(+), 8 deletions(-) rename lld/test/ELF/invalid/{verneed-shared.yaml => verneed-shared.test} (89%) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 9fdd0547ddca4..c451aee1f921a 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1224,14 +1224,12 @@ std::vector SharedFile::parseVerneed(const ELFFile &obj, ArrayRef data = CHECK(obj.getSectionContents(sec), this); const uint8_t *verneedBuf = data.begin(); for (unsigned i = 0; i != sec->sh_info; ++i) { - if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end() || - uintptr_t(verneedBuf) % sizeof(uint32_t) != 0) + if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) fatal(toString(this) + " has an invalid Verneed"); auto *vn = reinterpret_cast(verneedBuf); const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; for (unsigned j = 0; j != vn->vn_cnt; ++j) { - if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end() || - uintptr_t(vernauxBuf) % sizeof(uint32_t) != 0) + if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) fatal(toString(this) + " has an invalid Vernaux"); auto *aux = reinterpret_cast(vernauxBuf); if (aux->vna_name >= this->stringTable.size()) diff --git a/lld/test/ELF/invalid/verneed-shared.yaml b/lld/test/ELF/invalid/verneed-shared.test similarity index 89% rename from lld/test/ELF/invalid/verneed-shared.yaml rename to lld/test/ELF/invalid/verneed-shared.test index 18315fe8a2df9..916b8c1a5d950 100644 --- a/lld/test/ELF/invalid/verneed-shared.yaml +++ b/lld/test/ELF/invalid/verneed-shared.test @@ -6,7 +6,7 @@ ## sh_offset(SHT_GNU_verneed) is out of bounds. # RUN: yaml2obj --docnum=1 %s -o %t1.so # RUN: not ld.lld %t.o %t1.so -o /dev/null 2>&1 | FileCheck --check-prefix=SHOFFSET %s -# SHOFFSET: error: {{.*}}.so: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x0) that is greater than the file size (0x228) +# SHOFFSET: error: {{.*}}.so: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x0) that is greater than the file size (0x168) --- !ELF FileHeader: Class: ELFCLASS64 @@ -17,12 +17,14 @@ Sections: - Name: .gnu.version_r Type: SHT_GNU_verneed Flags: [ SHF_ALLOC ] + Info: 1 ShOffset: 0xFFFFFFFF -## A Verneed entry is misaligned (not a multiple of 4). +## A Verneed entry is misaligned (not a multiple of 4). This may happen +## some interface shared objects. We use memcpy to read the fields, so +## misalignment isn't a problem and there is no need to diagnose. # RUN: yaml2obj --docnum=2 %s -o %t2.so -# RUN: not ld.lld %t.o %t2.so -o /dev/null 2>&1 | FileCheck --check-prefix=VN-MISALIGNED %s -# VN-MISALIGNED: {{.*}}.so has an invalid Verneed +# RUN: ld.lld %t.o %t2.so -o /dev/null --- !ELF FileHeader: Class: ELFCLASS64 From 50db8402fc6652559d9ba3dc97bb787c4160ef5b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 26 May 2020 18:46:37 +0100 Subject: [PATCH 121/770] ResourcePriorityQueue.h - reduce unnecessary includes to forward declarations. NFC. Move includes to ResourcePriorityQueue.cpp --- llvm/include/llvm/CodeGen/ResourcePriorityQueue.h | 10 +++++----- .../lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h b/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h index 81587a3170ce1..b38cd49241742 100644 --- a/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h +++ b/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h @@ -16,15 +16,15 @@ #ifndef LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H #define LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H -#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCInstrItineraries.h" namespace llvm { + class DFAPacketizer; + class InstrItineraryData; class ResourcePriorityQueue; + class SelectionDAGISel; + class TargetInstrInfo; + class TargetRegisterInfo; /// Sorting functions for the Available queue. struct resource_sort { diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 34660e3a48ec5..55fe26eb64cda 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -19,9 +19,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" From 0165cf701156db4d399cb31d31ecb154372e2562 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 26 May 2020 19:09:36 +0100 Subject: [PATCH 122/770] ObjCARCAnalysisUtils.h - remove unused includes. NFC. We just need to include Passes.h in ObjCARCAliasAnalysis.cpp to compensate --- llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h | 4 ---- llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 3edcc9894cf75..d120c6a4fd592 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -23,17 +23,13 @@ #define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCInstKind.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" namespace llvm { namespace objcarc { diff --git a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp index abe3cde57a25c..80e019f5fc921 100644 --- a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" From 8d31dd23ec2368d00b0668c3d01b1fd2ce4d621b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 11:22:48 -0700 Subject: [PATCH 123/770] [lldb/Reproducers] Skip remaining failing test in python_api subdir Skip the remaining two failing test in the python_api subdirectory. See inline comments for the reason why. --- lldb/test/API/python_api/hello_world/TestHelloWorld.py | 1 + lldb/test/API/python_api/sbdata/TestSBData.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/lldb/test/API/python_api/hello_world/TestHelloWorld.py b/lldb/test/API/python_api/hello_world/TestHelloWorld.py index 5b189b36d7f92..2d38043bb4504 100644 --- a/lldb/test/API/python_api/hello_world/TestHelloWorld.py +++ b/lldb/test/API/python_api/hello_world/TestHelloWorld.py @@ -75,6 +75,7 @@ def test_with_process_launch_api(self): @add_test_categories(['pyapi']) @skipIfiOSSimulator @expectedFailureNetBSD + @skipIfReproducer # File synchronization is not supported during replay. def test_with_attach_to_process_with_id_api(self): """Create target, spawn a process, and attach to it with process id.""" exe = '%s_%d'%(self.testMethodName, os.getpid()) diff --git a/lldb/test/API/python_api/sbdata/TestSBData.py b/lldb/test/API/python_api/sbdata/TestSBData.py index a12f683d60139..ee04968042419 100644 --- a/lldb/test/API/python_api/sbdata/TestSBData.py +++ b/lldb/test/API/python_api/sbdata/TestSBData.py @@ -21,6 +21,7 @@ def setUp(self): self.line = line_number('main.cpp', '// set breakpoint here') @add_test_categories(['pyapi']) + @skipIfReproducer # SBData::SetData is not instrumented. def test_byte_order_and_address_byte_size(self): """Test the SBData::SetData() to ensure the byte order and address byte size are obeyed""" @@ -41,6 +42,7 @@ def test_byte_order_and_address_byte_size(self): self.assertTrue(addr == 0x8877665544332211); @add_test_categories(['pyapi']) + @skipIfReproducer # SBData::SetData is not instrumented. def test_with_run_command(self): """Test the SBData APIs.""" self.build() From a94e08d2e840a0e7ce032f59e9344bc49b5a54a1 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 11:32:02 -0700 Subject: [PATCH 124/770] [StaticAnalyzer] Fix non-virtual destructor warning Ficed warning: 'clang::ento::ExprEngine' has virtual functions but non-virtual destructor [- Wnon-virtual-dtor] ~ExprEngine() = default; --- .../clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index a94c847f35ee1..b32302cfc3378 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -177,7 +177,7 @@ class ExprEngine { SetOfConstDecls *VisitedCalleesIn, FunctionSummariesTy *FS, InliningModes HowToInlineIn); - ~ExprEngine() = default; + virtual ~ExprEngine() = default; /// Returns true if there is still simulation state on the worklist. bool ExecuteWorkList(const LocationContext *L, unsigned Steps = 150000) { From 2e824925402f011c2a4d3a0b51cce388b6d14d16 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 26 May 2020 11:31:24 -0700 Subject: [PATCH 125/770] [fuzzer][afl] Fix build with GCC Summary: Fixes this build error with GCC 9.3.0: ``` ../lib/fuzzer/afl/afl_driver.cpp:114:30: error: expected unqualified-id before string constant 114 | __attribute__((weak)) extern "C" void __sanitizer_set_report_fd(void *); | ^~~ ``` Reviewers: metzman, kcc Reviewed By: kcc Subscribers: #sanitizers Tags: #sanitizers Differential Revision: https://reviews.llvm.org/D80479 --- compiler-rt/lib/fuzzer/afl/afl_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/fuzzer/afl/afl_driver.cpp b/compiler-rt/lib/fuzzer/afl/afl_driver.cpp index bb3b48f367289..457f180ecc825 100644 --- a/compiler-rt/lib/fuzzer/afl/afl_driver.cpp +++ b/compiler-rt/lib/fuzzer/afl/afl_driver.cpp @@ -111,7 +111,7 @@ static uint8_t AflInputBuf[kMaxAflInputSize]; // Use this optionally defined function to output sanitizer messages even if // user asks to close stderr. -__attribute__((weak)) extern "C" void __sanitizer_set_report_fd(void *); +extern "C" __attribute__((weak)) void __sanitizer_set_report_fd(void *); // Keep track of where stderr content is being written to, so that // dup_and_close_stderr can use the correct one. From 6e9223a2c65835444c5c1328d52daf9f85f9618c Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 26 May 2020 13:34:52 -0500 Subject: [PATCH 126/770] [PowerPC][NFC] Update test to prevent DCE from causing failures The test case provided in PR45709 can be simplified by DCE to an empty function. To prevent this from happening if DCE is run prior to ISEL in the back end, just add optnone to the function. The behaviour it is testing for is in the SDAG legalization and is not sensitive to optnone so the test case still achieves its desired objective. --- llvm/test/CodeGen/PowerPC/pr45709.ll | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/pr45709.ll b/llvm/test/CodeGen/PowerPC/pr45709.ll index bc295fafd2105..3a26173965467 100644 --- a/llvm/test/CodeGen/PowerPC/pr45709.ll +++ b/llvm/test/CodeGen/PowerPC/pr45709.ll @@ -10,30 +10,37 @@ define dso_local void @_ZN1a1bEv(<4 x float> %in) local_unnamed_addr #0 align 2 { ; CHECK-LABEL: _ZN1a1bEv: ; CHECK: # %bb.0: -; CHECK-NEXT: bclr 12, 4*cr5+lt, 0 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_6 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: # %.preheader +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-NEXT: vxor v3, v3, v3 ; CHECK-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-NEXT: lvx v4, 0, r3 +; CHECK-NEXT: lvx v3, 0, r3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: vxor v3, v3, v3 ; CHECK-NEXT: addi r3, r1, -48 ; CHECK-NEXT: stvx v3, 0, r3 ; CHECK-NEXT: addi r3, r1, -32 -; CHECK-NEXT: vperm v2, v2, v2, v4 ; CHECK-NEXT: stvx v2, 0, r3 ; CHECK-NEXT: lwz r3, -48(r1) ; CHECK-NEXT: lwz r4, -32(r1) ; CHECK-NEXT: cmpw r4, r3 -; CHECK-NEXT: bc 12, gt, .LBB0_2 -; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_2: # %.preheader +; CHECK-NEXT: bc 12, gt, .LBB0_4 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: addi r3, r4, 0 -; CHECK-NEXT: .LBB0_3: # %.preheader +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: cmpw r3, r3 ; CHECK-NEXT: stw r3, -64(r1) ; CHECK-NEXT: addi r3, r1, -64 ; CHECK-NEXT: lvx v2, 0, r3 ; CHECK-NEXT: addi r3, r1, -16 ; CHECK-NEXT: stvx v2, 0, r3 +; CHECK-NEXT: .LBB0_6: ; CHECK-NEXT: blr br i1 undef, label %7, label %1 @@ -55,4 +62,4 @@ define dso_local void @_ZN1a1bEv(<4 x float> %in) local_unnamed_addr #0 align 2 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0 -attributes #0 = { nounwind } +attributes #0 = { nounwind optnone noinline } From 12dbdc2a6b68162f7370e9754bdb0e1edd65bf3c Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Tue, 26 May 2020 20:43:37 +0200 Subject: [PATCH 127/770] [Analyzer] Fix buildbot failure of commit rGd70ec366c91b --- .../clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index b32302cfc3378..3611979c61911 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -97,7 +97,7 @@ class SymbolManager; class SwitchNodeBuilder; class ExprEngine { - virtual void anchor(); + void anchor(); public: /// The modes of inlining, which override the default analysis-wide settings. enum InliningModes { From 7eb666b1556b86503f2f386bf921186cdbb2d22a Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Fri, 15 May 2020 12:30:07 -0500 Subject: [PATCH 128/770] [PowerPC] Add support for -mcpu=pwr10 in both clang and llvm Summary: This patch simply adds support for the new CPU in anticipation of Power10. There isn't really any functionality added so there are no associated test cases at this time. Reviewers: stefanp, nemanjai, amyk, hfinkel, power-llvm-team, #powerpc Reviewed By: stefanp, nemanjai, amyk, #powerpc Subscribers: NeHuang, steven.zhang, hiraditya, llvm-commits, wuzish, shchenz, cfe-commits, kbarton, echristo Tags: #clang, #powerpc, #llvm Differential Revision: https://reviews.llvm.org/D80020 --- clang/lib/Basic/Targets/PPC.cpp | 41 +++++++++----- clang/lib/Basic/Targets/PPC.h | 43 ++++++++------- clang/lib/Driver/ToolChains/Arch/PPC.cpp | 20 ++++--- clang/test/Misc/target-invalid-cpu-note.c | 2 +- clang/test/Preprocessor/init-ppc64.c | 18 ++++++ llvm/lib/Support/Host.cpp | 1 + llvm/lib/Target/PowerPC/PPC.td | 27 +++++++-- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 + llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 55 ++++++++++--------- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 9 ++- llvm/test/CodeGen/PowerPC/check-cpu.ll | 6 +- 12 files changed, 148 insertions(+), 78 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 81c13a8104e8a..231f94b66f5fd 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -151,6 +151,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("_ARCH_PWR8"); if (ArchDefs & ArchDefinePwr9) Builder.defineMacro("_ARCH_PWR9"); + if (ArchDefs & ArchDefinePwr10) + Builder.defineMacro("_ARCH_PWR10"); if (ArchDefs & ArchDefineA2) Builder.defineMacro("_ARCH_A2"); if (ArchDefs & ArchDefineA2q) { @@ -263,41 +265,51 @@ bool PPCTargetInfo::initFeatureMap( .Case("pwr7", true) .Case("pwr8", true) .Case("pwr9", true) + .Case("pwr10", true) .Case("ppc64", true) .Case("ppc64le", true) .Default(false); Features["qpx"] = (CPU == "a2q"); - Features["power9-vector"] = (CPU == "pwr9"); + Features["power9-vector"] = llvm::StringSwitch(CPU) + .Case("pwr10", true) + .Case("pwr9", true) + .Default(false); Features["crypto"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) + .Case("pwr10", true) .Case("pwr9", true) .Case("pwr8", true) .Default(false); Features["power8-vector"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) + .Case("pwr10", true) .Case("pwr9", true) .Case("pwr8", true) .Default(false); Features["bpermd"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) + .Case("pwr10", true) .Case("pwr9", true) .Case("pwr8", true) .Case("pwr7", true) .Default(false); Features["extdiv"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) + .Case("pwr10", true) .Case("pwr9", true) .Case("pwr8", true) .Case("pwr7", true) .Default(false); Features["direct-move"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) + .Case("pwr10", true) .Case("pwr9", true) .Case("pwr8", true) .Default(false); Features["vsx"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) + .Case("pwr10", true) .Case("pwr9", true) .Case("pwr8", true) .Case("pwr7", true) @@ -313,10 +325,10 @@ bool PPCTargetInfo::initFeatureMap( .Case("e500", true) .Default(false); - // Future CPU should include all of the features of Power 9 as well as any + // Future CPU should include all of the features of Power 10 as well as any // additional features (yet to be determined) specific to it. if (CPU == "future") { - initFeatureMap(Features, Diags, "pwr9", FeaturesVec); + initFeatureMap(Features, Diags, "pwr10", FeaturesVec); addFutureSpecificFeatures(Features); } @@ -463,18 +475,17 @@ ArrayRef PPCTargetInfo::getGCCAddlRegNames() const { } static constexpr llvm::StringLiteral ValidCPUNames[] = { - {"generic"}, {"440"}, {"450"}, {"601"}, {"602"}, - {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"}, - {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"}, - {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"}, - {"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"}, - {"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, - {"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, - {"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, - {"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"powerpc"}, - {"ppc"}, {"powerpc64"}, {"ppc64"}, {"powerpc64le"}, {"ppc64le"}, - {"future"} -}; + {"generic"}, {"440"}, {"450"}, {"601"}, {"602"}, + {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"}, + {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"}, + {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"}, + {"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"}, + {"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, + {"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, + {"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, + {"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, + {"pwr10"}, {"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"}, + {"powerpc64le"}, {"ppc64le"}, {"future"}}; bool PPCTargetInfo::isValidCPUName(StringRef Name) const { return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 7c19a96a99c74..3feda1853547f 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -43,13 +43,13 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { ArchDefinePwr7 = 1 << 11, ArchDefinePwr8 = 1 << 12, ArchDefinePwr9 = 1 << 13, - ArchDefineFuture = 1 << 14, - ArchDefineA2 = 1 << 15, - ArchDefineA2q = 1 << 16, - ArchDefineE500 = 1 << 17 + ArchDefinePwr10 = 1 << 14, + ArchDefineFuture = 1 << 15, + ArchDefineA2 = 1 << 16, + ArchDefineA2q = 1 << 17, + ArchDefineE500 = 1 << 18 } ArchDefineTypes; - ArchDefineTypes ArchDefs = ArchDefineNone; static const Builtin::Info BuiltinInfo[]; static const char *const GCCRegNames[]; @@ -119,20 +119,20 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { .Case("a2q", ArchDefineName | ArchDefineA2 | ArchDefineA2q) .Cases("power3", "pwr3", ArchDefinePpcgr) .Cases("power4", "pwr4", - ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) .Cases("power5", "pwr5", - ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | - ArchDefinePpcsq) + ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | + ArchDefinePpcsq) .Cases("power5x", "pwr5x", - ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | - ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | + ArchDefinePpcgr | ArchDefinePpcsq) .Cases("power6", "pwr6", - ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | - ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | + ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) .Cases("power6x", "pwr6x", - ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x | - ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | - ArchDefinePpcsq) + ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x | + ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | + ArchDefinePpcsq) .Cases("power7", "pwr7", ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | @@ -146,11 +146,16 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + .Cases("power10", "pwr10", + ArchDefinePwr10 | ArchDefinePwr9 | ArchDefinePwr8 | + ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | + ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | + ArchDefinePpcsq) .Case("future", - ArchDefineFuture | ArchDefinePwr9 | ArchDefinePwr8 | - ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | - ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | - ArchDefinePpcsq) + ArchDefineFuture | ArchDefinePwr10 | ArchDefinePwr9 | + ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 | + ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | + ArchDefinePpcgr | ArchDefinePpcsq) .Cases("8548", "e500", ArchDefineE500) .Default(ArchDefineNone); } diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index e5130a9485de7..144e276a6bd87 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -70,6 +70,7 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) { .Case("power7", "pwr7") .Case("power8", "pwr8") .Case("power9", "pwr9") + .Case("power10", "pwr10") .Case("future", "future") .Case("pwr3", "pwr3") .Case("pwr4", "pwr4") @@ -80,6 +81,7 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) { .Case("pwr7", "pwr7") .Case("pwr8", "pwr8") .Case("pwr9", "pwr9") + .Case("pwr10", "pwr10") .Case("powerpc", "ppc") .Case("powerpc64", "ppc64") .Case("powerpc64le", "ppc64le") @@ -91,14 +93,16 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) { const char *ppc::getPPCAsmModeForCPU(StringRef Name) { return llvm::StringSwitch(Name) - .Case("pwr7", "-mpower7") - .Case("power7", "-mpower7") - .Case("pwr8", "-mpower8") - .Case("power8", "-mpower8") - .Case("ppc64le", "-mpower8") - .Case("pwr9", "-mpower9") - .Case("power9", "-mpower9") - .Default("-many"); + .Case("pwr7", "-mpower7") + .Case("power7", "-mpower7") + .Case("pwr8", "-mpower8") + .Case("power8", "-mpower8") + .Case("ppc64le", "-mpower8") + .Case("pwr9", "-mpower9") + .Case("power9", "-mpower9") + .Case("pwr10", "-mpower10") + .Case("power10", "-mpower10") + .Default("-many"); } void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 9f036c94c3f8e..5c571fb458ec5 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -81,7 +81,7 @@ // PPC-SAME: 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, // PPC-SAME: 8548, 970, g5, a2, a2q, e500, e500mc, e5500, power3, pwr3, power4, // PPC-SAME: pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, -// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, powerpc, ppc, powerpc64, +// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, powerpc64, // PPC-SAME: ppc64, powerpc64le, ppc64le, future // RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS diff --git a/clang/test/Preprocessor/init-ppc64.c b/clang/test/Preprocessor/init-ppc64.c index b24f8eb7050be..ed8601636554e 100644 --- a/clang/test/Preprocessor/init-ppc64.c +++ b/clang/test/Preprocessor/init-ppc64.c @@ -627,12 +627,30 @@ // PPCPOWER9:#define _ARCH_PWR7 1 // PPCPOWER9:#define _ARCH_PWR9 1 // +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu pwr10 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER10 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu power10 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER10 %s +// +// PPCPOWER10:#define _ARCH_PPC 1 +// PPCPOWER10:#define _ARCH_PPC64 1 +// PPCPOWER10:#define _ARCH_PPCGR 1 +// PPCPOWER10:#define _ARCH_PPCSQ 1 +// PPCPOWER10:#define _ARCH_PWR10 1 +// PPCPOWER10:#define _ARCH_PWR4 1 +// PPCPOWER10:#define _ARCH_PWR5 1 +// PPCPOWER10:#define _ARCH_PWR5X 1 +// PPCPOWER10:#define _ARCH_PWR6 1 +// PPCPOWER10-NOT:#define _ARCH_PWR6X 1 +// PPCPOWER10:#define _ARCH_PWR7 1 +// PPCPOWER10:#define _ARCH_PWR8 1 +// PPCPOWER10:#define _ARCH_PWR9 1 +// // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu future -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCFUTURE %s // // PPCFUTURE:#define _ARCH_PPC 1 // PPCFUTURE:#define _ARCH_PPC64 1 // PPCFUTURE:#define _ARCH_PPCGR 1 // PPCFUTURE:#define _ARCH_PPCSQ 1 +// PPCFUTURE:#define _ARCH_PWR10 1 // PPCFUTURE:#define _ARCH_PWR4 1 // PPCFUTURE:#define _ARCH_PWR5 1 // PPCFUTURE:#define _ARCH_PWR5X 1 diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index d9b3cac5e8dc0..da68464c4a3d9 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -142,6 +142,7 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Case("POWER8E", "pwr8") .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") + .Case("POWER10", "pwr10") // FIXME: If we get a simulator or machine with the capabilities of // mcpu=future, we should revisit this and add the name reported by the // simulator/machine. diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 1d1f11e498c20..a6c7868f6ac25 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -51,6 +51,7 @@ def DirectivePwr6x def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">; def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">; +def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">; def DirectivePwrFuture : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">; @@ -205,6 +206,9 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0", "true", "Enable instructions added in ISA 3.0.">; +def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1", + "true", + "Enable instructions added in ISA 3.1.">; def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", "Enable POWER9 Altivec instructions", [FeatureISA3_0, FeatureP8Altivec]>; @@ -328,14 +332,25 @@ def ProcessorFeatures { list P9Features = !listconcat(P9InheritableFeatures, P9SpecificFeatures); + // Power10 + // For P10 CPU we assume that all of the existing features from Power9 + // still exist with the exception of those we know are Power9 specific. + list P10AdditionalFeatures = + [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops]; + list P10SpecificFeatures = []; + list P10InheritableFeatures = + !listconcat(P9InheritableFeatures, P10AdditionalFeatures); + list P10Features = + !listconcat(P10InheritableFeatures, P10SpecificFeatures); + // Future - // For future CPU we assume that all of the existing features from Power 9 - // still exist with the exception of those we know are Power 9 specific. + // For future CPU we assume that all of the existing features from Power10 + // still exist with the exception of those we know are Power10 specific. list FutureAdditionalFeatures = []; - list FutureSpecificFeatures = - [FeaturePrefixInstrs, FeaturePCRelativeMemops]; + list FutureSpecificFeatures = []; list FutureInheritableFeatures = - !listconcat(P9InheritableFeatures, FutureAdditionalFeatures); + !listconcat(P10InheritableFeatures, FutureAdditionalFeatures); list FutureFeatures = !listconcat(FutureInheritableFeatures, FutureSpecificFeatures); } @@ -540,6 +555,8 @@ def : ProcessorModel<"pwr6x", G5Model, def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; +// No scheduler model yet. +def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 42df83831113a..53f9ac678c7b7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1306,6 +1306,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); @@ -14913,6 +14914,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: { if (!ML) break; @@ -16103,6 +16105,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { // vector 7 2 2 return true; case PPC::DIR_PWR9: + case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index cfc54df13f792..2f332715d8cac 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -115,6 +115,7 @@ void PPCSubtarget::initializeEnvironment() { HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; + IsISA3_1 = false; UseLongCalls = false; SecurePlt = false; VectorsUseTwoUnits = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index be1143f903e8b..bfe39814e4cc8 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -34,32 +34,33 @@ class StringRef; namespace PPC { // -m directive values. - enum { - DIR_NONE, - DIR_32, - DIR_440, - DIR_601, - DIR_602, - DIR_603, - DIR_7400, - DIR_750, - DIR_970, - DIR_A2, - DIR_E500, - DIR_E500mc, - DIR_E5500, - DIR_PWR3, - DIR_PWR4, - DIR_PWR5, - DIR_PWR5X, - DIR_PWR6, - DIR_PWR6X, - DIR_PWR7, - DIR_PWR8, - DIR_PWR9, - DIR_PWR_FUTURE, - DIR_64 - }; +enum { + DIR_NONE, + DIR_32, + DIR_440, + DIR_601, + DIR_602, + DIR_603, + DIR_7400, + DIR_750, + DIR_970, + DIR_A2, + DIR_E500, + DIR_E500mc, + DIR_E5500, + DIR_PWR3, + DIR_PWR4, + DIR_PWR5, + DIR_PWR5X, + DIR_PWR6, + DIR_PWR6X, + DIR_PWR7, + DIR_PWR8, + DIR_PWR9, + DIR_PWR10, + DIR_PWR_FUTURE, + DIR_64 +}; } class GlobalValue; @@ -138,6 +139,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; + bool IsISA3_1; bool UseLongCalls; bool SecurePlt; bool VectorsUseTwoUnits; @@ -308,6 +310,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool hasHTM() const { return HasHTM; } bool hasFloat128() const { return HasFloat128; } bool isISA3_0() const { return IsISA3_0; } + bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index a41c6b41a991b..46c5335a558f4 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -651,11 +651,12 @@ unsigned PPCTTIImpl::getCacheLineSize() const { if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; - // On P7, P8 or P9 we have a cache line size of 128. + // Starting with P7 we have a cache line size of 128. unsigned Directive = ST->getCPUDirective(); // Assume that Future CPU has the same cache line size as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 || + Directive == PPC::DIR_PWR_FUTURE) return 128; // On other processors return a default of 64 bytes. @@ -687,9 +688,11 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { // For P7 and P8, floating-point instructions have a 6-cycle latency and // there are two execution units, so unroll by 12x for latency hiding. // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready + // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready // Assume that future is the same as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 || + Directive == PPC::DIR_PWR_FUTURE) return 12; // For most things, modern systems have two execution units (and diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll index baa39024ebe8d..132be3058216b 100644 --- a/llvm/test/CodeGen/PowerPC/check-cpu.ll +++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll @@ -2,9 +2,13 @@ ; RUN: -mcpu=future < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=future < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=power10 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 < %s | FileCheck %s -; Test mcpu=future that should be recognized on PowerPC. +; Test -mcpu=[pwr10|future] is recognized on PowerPC. ; CHECK-NOT: is not a recognized processor for this target ; CHECK: .text From 0788392637f414c312a995f3202177a2919eba2f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 May 2020 13:27:16 -0400 Subject: [PATCH 129/770] [InstCombine] add tests for reassociative sub/add expressions; NFC --- llvm/test/Transforms/InstCombine/sub.ll | 83 +++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index f6fa797eb0c82..52c51c70f02cf 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1538,3 +1538,86 @@ define i8 @test75(i8 %x) { %t1 = sub i8 %x, %t0 ret i8 %t1 } + +; ((w-x) + y) - z --> (w+y) - (x+z) + +define i8 @sub_add_sub_reassoc(i8 %w, i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_add_sub_reassoc( +; CHECK-NEXT: [[S1:%.*]] = sub i8 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[S1]], [[Y:%.*]] +; CHECK-NEXT: [[S2:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: ret i8 [[S2]] +; + %s1 = sub i8 %w, %x + %a = add i8 %s1, %y + %s2 = sub i8 %a, %z + ret i8 %s2 +} + +; vectors work too. + +define <2 x i8> @sub_add_sub_reassoc_commute(<2 x i8> %w, <2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { +; CHECK-LABEL: @sub_add_sub_reassoc_commute( +; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[S1:%.*]] = sub <2 x i8> [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[D]], [[S1]] +; CHECK-NEXT: [[S2:%.*]] = sub <2 x i8> [[A]], [[Z:%.*]] +; CHECK-NEXT: ret <2 x i8> [[S2]] +; + %d = sdiv <2 x i8> %y, ; thwart complexity-based canonicalization + %s1 = sub <2 x i8> %w, %x + %a = add <2 x i8> %d, %s1 + %s2 = sub <2 x i8> %a, %z + ret <2 x i8> %s2 +} + +; (v-w) + (x-y) - z --> (v+x) - (w+y+z) + +define i8 @sub_add_sub_reassoc_twice(i8 %v, i8 %w, i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_add_sub_reassoc_twice( +; CHECK-NEXT: [[S1:%.*]] = sub i8 [[V:%.*]], [[W:%.*]] +; CHECK-NEXT: [[S2:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[S1]], [[S2]] +; CHECK-NEXT: [[S3:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: ret i8 [[S3]] +; + %s1 = sub i8 %v, %w + %s2 = sub i8 %x, %y + %a = add i8 %s1, %s2 + %s3 = sub i8 %a, %z + ret i8 %s3 +} + +; negative test - uses + +define i8 @sub_add_sub_reassoc_use1(i8 %w, i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_add_sub_reassoc_use1( +; CHECK-NEXT: [[S1:%.*]] = sub i8 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: call void @use8(i8 [[S1]]) +; CHECK-NEXT: [[A:%.*]] = add i8 [[S1]], [[Y:%.*]] +; CHECK-NEXT: [[S2:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: ret i8 [[S2]] +; + %s1 = sub i8 %w, %x + call void @use8(i8 %s1) + %a = add i8 %s1, %y + %s2 = sub i8 %a, %z + ret i8 %s2 +} + +; negative test - uses + +define i8 @sub_add_sub_reassoc_use2(i8 %w, i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @sub_add_sub_reassoc_use2( +; CHECK-NEXT: [[S1:%.*]] = sub i8 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[S1]], [[Y:%.*]] +; CHECK-NEXT: call void @use8(i8 [[A]]) +; CHECK-NEXT: [[S2:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: ret i8 [[S2]] +; + %s1 = sub i8 %w, %x + %a = add i8 %s1, %y + call void @use8(i8 %a) + %s2 = sub i8 %a, %z + ret i8 %s2 +} From f5cfcc4b0638eaca9194776309d16cd59c1f961b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 May 2020 14:30:48 -0400 Subject: [PATCH 130/770] [LoopVectorize] regenerate full test checks; NFC --- .../LoopVectorize/interleaved-accesses.ll | 52 ++++++++++++++----- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 5e5dc5e74f1c1..b82d47fced554 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -133,20 +133,46 @@ for.end: ; preds = %for.body ; return r; ; } -; CHECK-LABEL: @test_struct_load4( -; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4 -; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> -; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> -; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> -; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> -; CHECK: add <4 x i32> -; CHECK: sub <4 x i32> -; CHECK: add <4 x i32> -; CHECK: sub <4 x i32> - %struct.ST4 = type { i32, i32, i32, i32 } define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) { +; CHECK-LABEL: @test_struct_load4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.ST4* [[S:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[STRIDED_VEC1]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[STRIDED_VEC2]] +; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[TMP4]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF4]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX5]], i32 0 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !7 +; CHECK: for.end: +; CHECK-NEXT: [[SUB8_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SUB8_LCSSA]] +; entry: br label %for.body @@ -187,7 +213,7 @@ for.end: ; preds = %for.body ; } ; CHECK-LABEL: @test_struct_store4( -; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>* +; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>* ; CHECK: add nsw <4 x i32> %[[LD]], ; CHECK: shl nsw <4 x i32> %[[LD]], ; CHECK: add nsw <4 x i32> %[[LD]], @@ -509,7 +535,7 @@ for.body: ; preds = %for.body, %entry ; int a; ; float b; ; }; -; +; ; int SA; ; float SB; ; From 1a2bffaf8b4567663f3001bd9c7532322e89f990 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 May 2020 14:32:57 -0400 Subject: [PATCH 131/770] [InstCombine] reassociate sub+add to increase adds and throughput The -reassociate pass tends to transform this kind of pattern into something that is worse for vectorization and codegen. See PR43953: https://bugs.llvm.org/show_bug.cgi?id=43953 Follows-up the FP version of the same transform: rGa0ce2338a083 --- .../InstCombine/InstCombineAddSub.cpp | 11 ++++++++++ llvm/test/Transforms/InstCombine/sub.ll | 20 +++++++++---------- .../LoopVectorize/interleaved-accesses.ll | 6 +++--- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 233e0c7b5de72..288d0d148689e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1765,6 +1765,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes())))) return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X); + // Reassociate sub/add sequences to create more add instructions and + // reduce dependency chains: + // ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1) + Value *Z; + if (match(Op0, m_OneUse(m_c_Add(m_OneUse(m_Sub(m_Value(X), m_Value(Y))), + m_Value(Z))))) { + Value *XZ = Builder.CreateAdd(X, Z); + Value *YW = Builder.CreateAdd(Y, Op1); + return BinaryOperator::CreateSub(XZ, YW); + } + if (Constant *C = dyn_cast(Op0)) { Value *X; if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 52c51c70f02cf..9463cea877b92 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1543,9 +1543,9 @@ define i8 @test75(i8 %x) { define i8 @sub_add_sub_reassoc(i8 %w, i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @sub_add_sub_reassoc( -; CHECK-NEXT: [[S1:%.*]] = sub i8 [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i8 [[S1]], [[Y:%.*]] -; CHECK-NEXT: [[S2:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[W:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[S2:%.*]] = sub i8 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i8 [[S2]] ; %s1 = sub i8 %w, %x @@ -1559,9 +1559,9 @@ define i8 @sub_add_sub_reassoc(i8 %w, i8 %x, i8 %y, i8 %z) { define <2 x i8> @sub_add_sub_reassoc_commute(<2 x i8> %w, <2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @sub_add_sub_reassoc_commute( ; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[S1:%.*]] = sub <2 x i8> [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[D]], [[S1]] -; CHECK-NEXT: [[S2:%.*]] = sub <2 x i8> [[A]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[D]], [[W:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[S2:%.*]] = sub <2 x i8> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[S2]] ; %d = sdiv <2 x i8> %y, ; thwart complexity-based canonicalization @@ -1575,10 +1575,10 @@ define <2 x i8> @sub_add_sub_reassoc_commute(<2 x i8> %w, <2 x i8> %x, <2 x i8> define i8 @sub_add_sub_reassoc_twice(i8 %v, i8 %w, i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @sub_add_sub_reassoc_twice( -; CHECK-NEXT: [[S1:%.*]] = sub i8 [[V:%.*]], [[W:%.*]] -; CHECK-NEXT: [[S2:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i8 [[S1]], [[S2]] -; CHECK-NEXT: [[S3:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[W:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X:%.*]], [[V:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[S3:%.*]] = sub i8 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i8 [[S3]] ; %s1 = sub i8 %v, %w diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index b82d47fced554..f7a02d613af1b 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -152,9 +152,9 @@ define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[STRIDED_VEC1]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[STRIDED_VEC2]] -; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[TMP4]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[STRIDED_VEC2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 From 713538b629e45e6236b5d60fd6b64d7b8669cd00 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 26 May 2020 11:58:31 -0700 Subject: [PATCH 132/770] Be more specific about auto * vs auto for po alias. --- lldb/source/Interpreter/CommandInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 1cd71b07eaeb7..61288fc42131a 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -356,7 +356,7 @@ void CommandInterpreter::Initialize() { AddAlias("p", cmd_obj_sp, "--")->SetHelpLong(""); AddAlias("print", cmd_obj_sp, "--")->SetHelpLong(""); AddAlias("call", cmd_obj_sp, "--")->SetHelpLong(""); - if (auto po = AddAlias("po", cmd_obj_sp, "-O --")) { + if (auto *po = AddAlias("po", cmd_obj_sp, "-O --")) { po->SetHelp("Evaluate an expression on the current thread. Displays any " "returned value with formatting " "controlled by the type's author."); From fca76b79456c916fd2ce193ef76d6e795bd9c105 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 26 May 2020 12:04:14 -0700 Subject: [PATCH 133/770] Roll variables into an LLVM_DEBUG block to address -Wunused-but-set-variable --- llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index cc7a7e2ca9cc8..ee219724ee469 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -83,8 +83,6 @@ class ELFLinkGraphBuilder_x86_64 { for (auto SymRef : *Symbols) { Optional Name; - unsigned char Binding; - uint64_t Value; uint64_t Size = 0; // FIXME: Read size. @@ -95,17 +93,15 @@ class ELFLinkGraphBuilder_x86_64 { } else { return NameOrErr.takeError(); } - Binding = SymRef.getBinding(); - Value = SymRef.getValue(); LLVM_DEBUG({ dbgs() << " "; if (!Name) dbgs() << ""; else dbgs() << *Name; - dbgs() << ": value = " << formatv("{0:x16}", Value) + dbgs() << ": value = " << formatv("{0:x16}", SymRef.getValue()) << ", type = " << formatv("{0:x2}", SymRef.getType()) - << ", binding = " << Binding + << ", binding = " << SymRef.getBinding() << ", size =" << Size; dbgs() << "\n"; }); From ae903f0313e481520eff8a13044070aca4d0b75d Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 12:14:32 -0700 Subject: [PATCH 134/770] [lldb/Test] Reinstate FoundationSymtabTestCase --- .../lang/objc/foundation/TestSymbolTable.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py index b77a8dfc0ed90..f3331e829c27e 100644 --- a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py +++ b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py @@ -40,13 +40,25 @@ def test_with_python_api(self): process = target.LaunchSimple( None, None, self.get_process_working_directory()) - # - # Exercise Python APIs to access the symbol table entries. - # - # Create the filespec by which to locate our a.out module. filespec = lldb.SBFileSpec(exe, False) module = target.FindModule(filespec) self.assertTrue(module, VALID_MODULE) + # Create the set of known symbols. As we iterate through the symbol + # table, remove the symbol from the set if it is a known symbol. + expected_symbols = set(self.symbols_list) + for symbol in module: + self.assertTrue(symbol, VALID_SYMBOL) + #print("symbol:", symbol) + name = symbol.GetName() + if name in expected_symbols: + #print("Removing %s from known_symbols %s" % (name, expected_symbols)) + expected_symbols.remove(name) + + # At this point, the known_symbols set should have become an empty set. + # If not, raise an error. + #print("symbols unaccounted for:", expected_symbols) + self.assertTrue(len(expected_symbols) == 0, + "All the known symbols are accounted for") From ef94f60ff7954521e6ff1be044a4a5d0599ce4ef Mon Sep 17 00:00:00 2001 From: Stefanos Baziotis Date: Tue, 26 May 2020 22:16:13 +0300 Subject: [PATCH 135/770] [MSSA][Doc] Fix typo --- llvm/docs/MemorySSA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/MemorySSA.rst b/llvm/docs/MemorySSA.rst index 4f96c83a032d2..99adc8b11911b 100644 --- a/llvm/docs/MemorySSA.rst +++ b/llvm/docs/MemorySSA.rst @@ -64,7 +64,7 @@ or indireclty. For example in: ``d`` is connected directly with ``c`` and indirectly with ``b``. This means that ``d`` potentially clobbers (see below) ``c`` *or* -``b`` *or* both. This in turn implies that without the use of `The walker_`, +``b`` *or* both. This in turn implies that without the use of `The walker`_, initially every ``MemoryDef`` clobbers every other ``MemoryDef``. ``MemoryPhi``\ s are ``PhiNode``\ s, but for memory operations. If at any From c4dbe59ae8253d73b63e5fcce0bc8bc44b4d07b5 Mon Sep 17 00:00:00 2001 From: Pete Steinfeld Date: Fri, 22 May 2020 12:08:56 -0700 Subject: [PATCH 136/770] [flang] Fixes for problems with declaring procedure entities Summary: We were not detecting declaring multiple interfaces to the same procedure. Also, we were not handling the initialization of entitiies where the associated Symbol had previously had errors. I added the function `IsInterfaceSet()` to ProcEntityDetails to see if the value of `interface_` had been previously set. I then checked this function before calling set_interface() and emitted an error message if the interface was already set. Also, in situations where we were emitting error messages for symbols, I set the Error flag on the Symbol. Then when performing initialization on the Symbol, I first check to see if the Symbol had an error. Reviewers: tskeith, klausler, DavidTruby Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80453 --- flang/include/flang/Semantics/symbol.h | 8 ++- flang/lib/Semantics/resolve-names.cpp | 96 ++++++++++++++++---------- flang/test/Semantics/resolve91.f90 | 46 ++++++++++++ 3 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 flang/test/Semantics/resolve91.f90 diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 2a95f483a173e..34e4ea95eb4af 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -218,7 +218,13 @@ class ProcEntityDetails : public EntityDetails, public WithPassArg { const ProcInterface &interface() const { return interface_; } ProcInterface &interface() { return interface_; } - void set_interface(const ProcInterface &interface) { interface_ = interface; } + void set_interface(const ProcInterface &interface) { + CHECK(!IsInterfaceSet()); + interface_ = interface; + } + bool IsInterfaceSet() { + return interface_.symbol() != nullptr || interface_.type() != nullptr; + } inline bool HasExplicitInterface() const; // Be advised: !init().has_value() => uninitialized pointer, diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 175d02597dfa2..3b60969b122a7 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3435,18 +3435,25 @@ Symbol &DeclarationVisitor::DeclareProcEntity( const parser::Name &name, Attrs attrs, const ProcInterface &interface) { Symbol &symbol{DeclareEntity(name, attrs)}; if (auto *details{symbol.detailsIf()}) { - if (interface.type()) { - symbol.set(Symbol::Flag::Function); - } else if (interface.symbol()) { - if (interface.symbol()->test(Symbol::Flag::Function)) { + if (details->IsInterfaceSet()) { + SayWithDecl(name, symbol, + "The interface for procedure '%s' has already been " + "declared"_err_en_US); + context().SetError(symbol); + } else { + if (interface.type()) { symbol.set(Symbol::Flag::Function); - } else if (interface.symbol()->test(Symbol::Flag::Subroutine)) { - symbol.set(Symbol::Flag::Subroutine); + } else if (interface.symbol()) { + if (interface.symbol()->test(Symbol::Flag::Function)) { + symbol.set(Symbol::Flag::Function); + } else if (interface.symbol()->test(Symbol::Flag::Subroutine)) { + symbol.set(Symbol::Flag::Subroutine); + } } + details->set_interface(interface); + SetBindNameOn(symbol); + SetPassNameOn(symbol); } - details->set_interface(interface); - SetBindNameOn(symbol); - SetPassNameOn(symbol); } return symbol; } @@ -3460,18 +3467,22 @@ Symbol &DeclarationVisitor::DeclareObjectEntity( } if (!arraySpec().empty()) { if (details->IsArray()) { - Say(name, - "The dimensions of '%s' have already been declared"_err_en_US); - context().SetError(symbol); + if (!context().HasError(symbol)) { + Say(name, + "The dimensions of '%s' have already been declared"_err_en_US); + context().SetError(symbol); + } } else { details->set_shape(arraySpec()); } } if (!coarraySpec().empty()) { if (details->IsCoarray()) { - Say(name, - "The codimensions of '%s' have already been declared"_err_en_US); - context().SetError(symbol); + if (!context().HasError(symbol)) { + Say(name, + "The codimensions of '%s' have already been declared"_err_en_US); + context().SetError(symbol); + } } else { details->set_coshape(coarraySpec()); } @@ -3913,7 +3924,7 @@ bool DeclarationVisitor::Pre(const parser::ProcComponentDefStmt &) { CHECK(!interfaceName_); return true; } -void DeclarationVisitor::Post(const parser::ProcComponentDefStmt &stmt) { +void DeclarationVisitor::Post(const parser::ProcComponentDefStmt &) { interfaceName_ = nullptr; } bool DeclarationVisitor::Pre(const parser::ProcPointerInit &x) { @@ -4702,9 +4713,11 @@ void DeclarationVisitor::SetType( } else if (!symbol.test(Symbol::Flag::Implicit)) { SayWithDecl( name, symbol, "The type of '%s' has already been declared"_err_en_US); + context().SetError(symbol); } else if (type != *prevType) { SayWithDecl(name, symbol, "The type of '%s' has already been implicitly declared"_err_en_US); + context().SetError(symbol); } else { symbol.set(Symbol::Flag::Implicit, false); } @@ -5697,17 +5710,21 @@ void DeclarationVisitor::PointerInitialization( const parser::Name &name, const parser::InitialDataTarget &target) { if (name.symbol) { Symbol &ultimate{name.symbol->GetUltimate()}; - if (IsPointer(ultimate)) { - if (auto *details{ultimate.detailsIf()}) { - CHECK(!details->init()); - Walk(target); - if (MaybeExpr expr{EvaluateExpr(target)}) { - CheckInitialDataTarget(ultimate, *expr, target.value().source); - details->set_init(std::move(*expr)); + if (!context().HasError(ultimate)) { + if (IsPointer(ultimate)) { + if (auto *details{ultimate.detailsIf()}) { + CHECK(!details->init()); + Walk(target); + if (MaybeExpr expr{EvaluateExpr(target)}) { + CheckInitialDataTarget(ultimate, *expr, target.value().source); + details->set_init(std::move(*expr)); + } } + } else { + Say(name, + "'%s' is not a pointer but is initialized like one"_err_en_US); + context().SetError(ultimate); } - } else { - Say(name, "'%s' is not a pointer but is initialized like one"_err_en_US); } } } @@ -5715,22 +5732,25 @@ void DeclarationVisitor::PointerInitialization( const parser::Name &name, const parser::ProcPointerInit &target) { if (name.symbol) { Symbol &ultimate{name.symbol->GetUltimate()}; - if (IsProcedurePointer(ultimate)) { - auto &details{ultimate.get()}; - CHECK(!details.init()); - Walk(target); - if (const auto *targetName{std::get_if(&target.u)}) { - CheckInitialProcTarget(ultimate, *targetName, name.source); - if (targetName->symbol) { - details.set_init(*targetName->symbol); + if (!context().HasError(ultimate)) { + if (IsProcedurePointer(ultimate)) { + auto &details{ultimate.get()}; + CHECK(!details.init()); + Walk(target); + if (const auto *targetName{std::get_if(&target.u)}) { + CheckInitialProcTarget(ultimate, *targetName, name.source); + if (targetName->symbol) { + details.set_init(*targetName->symbol); + } + } else { + details.set_init(nullptr); // explicit NULL() } } else { - details.set_init(nullptr); // explicit NULL() + Say(name, + "'%s' is not a procedure pointer but is initialized " + "like one"_err_en_US); + context().SetError(ultimate); } - } else { - Say(name, - "'%s' is not a procedure pointer but is initialized " - "like one"_err_en_US); } } } diff --git a/flang/test/Semantics/resolve91.f90 b/flang/test/Semantics/resolve91.f90 new file mode 100644 index 0000000000000..f55ca865cf3c0 --- /dev/null +++ b/flang/test/Semantics/resolve91.f90 @@ -0,0 +1,46 @@ +! RUN: %S/test_errors.sh %s %t %f18 +! Tests for duplicate definitions and initializations, mostly of procedures +module m + procedure(real), pointer :: p + !ERROR: The interface for procedure 'p' has already been declared + procedure(integer), pointer :: p +end + +module m1 + real, dimension(:), pointer :: realArray => null() + !ERROR: The type of 'realarray' has already been declared + real, dimension(:), pointer :: realArray => localArray +end module m1 + +module m2 + interface + subroutine sub() + end subroutine sub + end interface + + procedure(sub), pointer :: p1 => null() + !ERROR: The interface for procedure 'p1' has already been declared + procedure(sub), pointer :: p1 => null() + +end module m2 + +module m3 + interface + real function fun() + end function fun + end interface + + procedure(fun), pointer :: f1 => null() + !ERROR: The interface for procedure 'f1' has already been declared + procedure(fun), pointer :: f1 => null() + +end module m3 + +module m4 + real, dimension(:), pointer :: localArray => null() + type :: t2 + real, dimension(:), pointer :: realArray => null() + !ERROR: Component 'realarray' is already declared in this derived type + real, dimension(:), pointer :: realArray => localArray + end type +end module m4 From e09064e97f293491e59b30569033c8962129bdeb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 24 May 2020 18:06:56 -0400 Subject: [PATCH 137/770] AMDGPU: Update store node checks for atomics Prepare to switch to using StoreSDNode for atomic stores. --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 ++-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 50ee0856377a2..52823c16d72d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2730,7 +2730,7 @@ bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const { ( Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && - !Ld->isVolatile() && + Ld->isSimple() && !N->isDivergent() && static_cast( getTargetLowering())->isMemOpHasNoClobberedMemOperand(N) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 1edc42adf9de8..d1891e25e5f29 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2920,7 +2920,7 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, return SDValue(); LoadSDNode *LN = cast(N); - if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN)) + if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN)) return SDValue(); SDLoc SL(N); @@ -2974,7 +2974,7 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, return SDValue(); StoreSDNode *SN = cast(N); - if (SN->isVolatile() || !ISD::isNormalStore(SN)) + if (!SN->isSimple() || !ISD::isNormalStore(SN)) return SDValue(); EVT VT = SN->getMemoryVT(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2c147fa8947c1..a2f5b6cdeec25 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7625,7 +7625,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || AS == AMDGPUAS::GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && - !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) && + Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) && Alignment >= 4 && NumElements < 32) { if (MemVT.isPow2VectorType()) return SDValue(); From e99d50d8440efe8fa3515db4dae873ba39810dfd Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Tue, 26 May 2020 12:22:03 -0700 Subject: [PATCH 138/770] [Support] Remove stale comment Clang has supported __builtin_assume_aligned since r217349 back in 2014, so the comment is very out of date. --- llvm/include/llvm/Support/Compiler.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index 79af6f5d3c686..80ea76240d6cf 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -373,7 +373,6 @@ #if __has_builtin(__builtin_assume_aligned) || LLVM_GNUC_PREREQ(4, 7, 0) # define LLVM_ASSUME_ALIGNED(p, a) __builtin_assume_aligned(p, a) #elif defined(LLVM_BUILTIN_UNREACHABLE) -// As of today, clang does not support __builtin_assume_aligned. # define LLVM_ASSUME_ALIGNED(p, a) \ (((uintptr_t(p) % (a)) == 0) ? (p) : (LLVM_BUILTIN_UNREACHABLE, (p))) #else From ba10daa820fa868816eed2b85e70197d354ebfe6 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 26 May 2020 15:34:57 -0400 Subject: [PATCH 139/770] [mlir][Vector] Add more vector.contract -> outerproduct lowerings and fix vector.contract type inference. This revision expands the types of vector contractions that can be lowered to vector.outerproduct. All 8 permutation cases are support. The idiomatic manipulation of AffineMap written declaratively makes this straightforward. In the process a bug with the vector.contract verifier was uncovered. The vector shape verification part of the contract op is rewritten to use AffineMap composition. One bug in the vector `ops.mlir` test is fixed and a new case not yet captured is added to the vector`invalid.mlir` test. Differential Revision: https://reviews.llvm.org/D80393 --- .../mlir/Dialect/Utils/StructuredOpsUtils.h | 12 + mlir/lib/Dialect/Vector/VectorOps.cpp | 67 ++++-- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 133 ++++++++--- mlir/test/Dialect/Vector/invalid.mlir | 20 ++ mlir/test/Dialect/Vector/ops.mlir | 14 +- .../Vector/vector-contract-transforms.mlir | 216 ++++++++++++++++++ 6 files changed, 408 insertions(+), 54 deletions(-) diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 5a36aabfab75e..02d2762560767 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -81,12 +81,24 @@ constexpr StringRef getPaddingAttrName() { return "padding"; } /// Use to encode that a particular iterator type has parallel semantics. constexpr StringRef getParallelIteratorTypeName() { return "parallel"; } +constexpr bool isParallelIterator(Attribute attr) { + auto strAttr = attr.dyn_cast_or_null(); + return strAttr && strAttr.getValue() == getParallelIteratorTypeName(); +} /// Use to encode that a particular iterator type has reduction semantics. constexpr StringRef getReductionIteratorTypeName() { return "reduction"; } +constexpr bool isReductionIterator(Attribute attr) { + auto strAttr = attr.dyn_cast_or_null(); + return strAttr && strAttr.getValue() == getReductionIteratorTypeName(); +} /// Use to encode that a particular iterator type has window semantics. constexpr StringRef getWindowIteratorTypeName() { return "window"; } +constexpr bool isWindowIterator(Attribute attr) { + auto strAttr = attr.dyn_cast_or_null(); + return strAttr && strAttr.getValue() == getWindowIteratorTypeName(); +} /// Use to encode that a particular iterator type has window semantics. inline ArrayRef getAllIteratorTypeNames() { diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 1574edb344941..63891d1004d4e 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -223,8 +223,9 @@ static bool verifyDimMap(VectorType lhsType, VectorType rhsType, return true; } -static bool verifyOutputShape( - VectorType lhsType, VectorType rhsType, Type accType, Type resType, +static LogicalResult verifyOutputShape( + ContractionOp op, VectorType lhsType, VectorType rhsType, Type accType, + Type resType, const std::vector> &contractingDimMap, const std::vector> &batchDimMap) { DenseSet lhsContractingDimSet; @@ -256,26 +257,56 @@ static bool verifyOutputShape( if (expectedResultDims.size() == 0) { // No batch or free dimension implies a scalar result. if (resType.isa() || accType.isa()) - return false; - + return op.emitOpError("invalid accumulator/result vector shape"); } else { // At least one batch or free dimension implies a vector result. auto resVectorType = resType.dyn_cast(); auto accVectorType = accType.dyn_cast(); if (!resVectorType || !accVectorType) - return false; - - // Verify dimension from 'resType' against 'expectedResultDims'. - if (resVectorType.getShape().size() != expectedResultDims.size() || - accVectorType.getShape().size() != expectedResultDims.size()) - return false; - for (int64_t i = 0, e = resVectorType.getRank(); i < e; ++i) { - if (resVectorType.getDimSize(i) != expectedResultDims[i] || - accVectorType.getDimSize(i) != expectedResultDims[i]) - return false; + return op.emitOpError("invalid accumulator/result vector shape"); + + // Infer expected result vector type. Lhs + rhs map and lhs + rhs vector + // types fully define the result vector type. This assumes the affine maps + // are well-formed, which must have been verified already. + MLIRContext *ctx = op.getContext(); + AffineMap lhsMap = op.getIndexingMaps()[0]; + AffineMap rhsMap = op.getIndexingMaps()[1]; + SmallVector extents(lhsMap.getNumInputs()); + for (auto pair : + {std::make_pair(lhsType, lhsMap), std::make_pair(rhsType, rhsMap)}) { + VectorType v = pair.first; + auto map = pair.second; + for (unsigned idx = 0, e = v.getRank(); idx < e; ++idx) { + unsigned pos = map.getResult(idx).cast().getPosition(); + if (!extents[pos]) + extents[pos] = getAffineConstantExpr(v.getShape()[idx], ctx); + } } + assert(llvm::all_of(extents, [](AffineExpr e) { return e; }) && + "expected extent along all dimensions."); + + AffineMap resMap = op.getIndexingMaps()[2]; + auto extentsMap = AffineMap::get(/*dimCount=*/extents.size(), + /*symCount=*/0, extents, ctx); + // Compose the resMap with the extentsMap, which is a constant map. + AffineMap expectedMap = simplifyAffineMap(resMap.compose(extentsMap)); + assert(llvm::all_of( + expectedMap.getResults(), + [](AffineExpr e) { return e.isa(); }) && + "expected constant extent along all dimensions."); + // Extract the expected shape and build the type. + auto expectedShape = llvm::to_vector<4>( + llvm::map_range(expectedMap.getResults(), [](AffineExpr e) { + return e.cast().getValue(); + })); + auto expected = + VectorType::get(expectedShape, resVectorType.getElementType()); + if (resVectorType != expected || accVectorType != expected) + return op.emitOpError( + "invalid accumulator/result vector shape, expected: ") + << expected; } - return true; + return success(); } static LogicalResult verify(ContractionOp op) { @@ -329,9 +360,9 @@ static LogicalResult verify(ContractionOp op) { return op.emitOpError("invalid batch dimension map"); // Verify 'accType' and 'resType' shape. - if (!verifyOutputShape(lhsType, rhsType, accType, resType, contractingDimMap, - batchDimMap)) - return op.emitOpError("invalid accumulator/result vector shape"); + if (failed(verifyOutputShape(op, lhsType, rhsType, accType, resType, + contractingDimMap, batchDimMap))) + return failure(); // Verify that either two vector masks are set or none are set. auto lhsMaskType = op.getLHSVectorMaskType(); diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 44ff03a04f223..491ad62affcbd 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -1454,10 +1454,17 @@ ContractionOpToMatmulOpLowering::match(vector::ContractionOp op) const { if (llvm::size(op.masks()) != 0) return failure(); + auto iteratorTypes = op.iterator_types().getValue(); + if (!isParallelIterator(iteratorTypes[0]) || + !isParallelIterator(iteratorTypes[1]) || + !isReductionIterator(iteratorTypes[2])) + return failure(); + if (vectorTransformsOptions.vectorContractLowering != vector::VectorContractLowering::Matmul || !isRowMajorMatmul(op.indexing_maps())) return failure(); + return success(); } @@ -1503,34 +1510,8 @@ void ContractionOpToMatmulOpLowering::rewrite(vector::ContractionOp op, /// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 /// ``` /// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct and -/// the vector.contract op is a row-major matrix multiply. -void ContractionOpToOuterProductOpLowering::rewrite( - vector::ContractionOp op, PatternRewriter &rewriter) const { - VectorType lhsType = op.getLhsType(); - // TODO(ntv) other modes. - // We know we are in row-major. - bool transposeLhs = false; - unsigned reductionSize = - transposeLhs ? lhsType.getShape()[0] : lhsType.getShape()[1]; - - // If transposeLhs == false (i.e. lhs(m, reductionSize)), we need to - // transpose it to extract the proper vector. Otherwise, just take - // the lhs. - Value lhs = transposeLhs - ? op.lhs() - : rewriter.create( - op.getLoc(), op.lhs(), ArrayRef{1, 0}); - Value res = op.acc(); - // ExtractOp does not allow dynamic indexing, we must unroll explicitly. - for (unsigned k = 0; k < reductionSize; ++k) { - Value a = rewriter.create(op.getLoc(), lhs, k); - Value b = rewriter.create(op.getLoc(), op.rhs(), k); - res = rewriter.create(op.getLoc(), a, b, res); - } - rewriter.replaceOp(op, res); -} - +/// This only kicks in when VectorTransformsOptions is set to OuterProduct but +/// otherwise supports any layout permutation of the matrix-multiply. LogicalResult ContractionOpToOuterProductOpLowering ::match(vector::ContractionOp op) const { // TODO(ajcbik): implement masks @@ -1538,12 +1519,104 @@ ContractionOpToOuterProductOpLowering ::match(vector::ContractionOp op) const { return failure(); if (vectorTransformsOptions.vectorContractLowering != - vector::VectorContractLowering::OuterProduct || - !isRowMajorMatmul(op.indexing_maps())) + vector::VectorContractLowering::OuterProduct) + return failure(); + + // Transpose arguments to make them ready for lowering to OuterProduct. The + // constraint to match is that we must load full rows at a time with + // vector::ExtractOp. + using MapList = ArrayRef>; + auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; + AffineExpr m, n, k; + bindDims(op.getContext(), m, n, k); + auto iteratorTypes = op.iterator_types().getValue(); + if (!isParallelIterator(iteratorTypes[0]) || + !isParallelIterator(iteratorTypes[1]) || + !isReductionIterator(iteratorTypes[2])) + return failure(); + SmallVector maps = op.getIndexingMaps(); + // When lowering to outerproduct we can support all permutations. + if (maps != infer({{m, k}, {k, n}, {m, n}}) && + maps != infer({{m, k}, {n, k}, {m, n}}) && + maps != infer({{k, m}, {k, n}, {m, n}}) && + maps != infer({{k, m}, {n, k}, {m, n}}) && + maps != infer({{m, k}, {k, n}, {n, m}}) && + maps != infer({{m, k}, {n, k}, {n, m}}) && + maps != infer({{k, m}, {k, n}, {n, m}}) && + maps != infer({{k, m}, {n, k}, {n, m}})) return failure(); return success(); } +void ContractionOpToOuterProductOpLowering::rewrite( + vector::ContractionOp op, PatternRewriter &rewriter) const { + Location loc = op.getLoc(); + unsigned reductionSize = 0; + VectorType lhsType = op.getLhsType(); + Value lhs = op.lhs(), rhs = op.rhs(), res = op.acc(); + + // Transpose arguments to make them ready for lowering to OuterProduct. The + // constraint to match is that we must load full rows at a time with + // vector::ExtractOp. + using MapList = ArrayRef>; + auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; + AffineExpr m, n, k; + bindDims(rewriter.getContext(), m, n, k); + SmallVector perm{1, 0}; + SmallVector maps = op.getIndexingMaps(); + // First batch of cases, no need to output permute. + if (maps == infer({{m, k}, {k, n}, {m, n}})) { + // This is the classical row-major matmul. Just permute the lhs. + reductionSize = lhsType.getShape()[1]; + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{m, k}, {n, k}, {m, n}})) { + // TODO: may be better to fail and use some vector -> scalar reduction. + reductionSize = lhsType.getShape()[1]; + lhs = rewriter.create(loc, lhs, perm); + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { + // No need to permute anything. + reductionSize = lhsType.getShape()[0]; + } else if (maps == infer({{k, m}, {n, k}, {m, n}})) { + // Just permute the rhs. + reductionSize = lhsType.getShape()[0]; + rhs = rewriter.create(loc, rhs, perm); + } + // Second batch of cases, reshuffle to avoid output permute. + else if (maps == infer({{m, k}, {k, n}, {n, m}})) { + // This is the classical row-major matmul. Just permute the lhs. + reductionSize = lhsType.getShape()[1]; + Value tmp = rhs; + rhs = rewriter.create(loc, lhs, perm); + lhs = tmp; + } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { + // TODO: may be better to fail and use some vector -> scalar reduction. + reductionSize = lhsType.getShape()[1]; + Value tmp = rhs; + rhs = rewriter.create(loc, lhs, perm); + lhs = rewriter.create(loc, tmp, perm); + } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { + // No need to permute anything, but still swap lhs and rhs. + reductionSize = lhsType.getShape()[0]; + std::swap(lhs, rhs); + } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { + // Just permute the rhs. + reductionSize = lhsType.getShape()[0]; + Value tmp = lhs; + lhs = rewriter.create(loc, rhs, perm); + rhs = tmp; + } + assert(reductionSize > 0); + + // ExtractOp does not allow dynamic indexing, we must unroll explicitly. + for (unsigned k = 0; k < reductionSize; ++k) { + Value a = rewriter.create(op.getLoc(), lhs, k); + Value b = rewriter.create(op.getLoc(), rhs, k); + res = rewriter.create(op.getLoc(), a, b, res); + } + rewriter.replaceOp(op, res); +} + /// Progressive lowering of ContractionOp. /// One: /// %x = vector.contract with at least one free/batch dimension diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index c18cf38edfc90..cc72511a6e782 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -767,6 +767,26 @@ func @contraction(%arg0: vector<4x3xi32>, // ----- +#contraction_accesses = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (n, m)> +] +#contraction_trait = { + indexing_maps = #contraction_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} +func @contraction(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<2x3xf32>) +-> vector<3x2xf32> +{ +// expected-error@+1 {{invalid accumulator/result vector shape, expected: 'vector<3x2xf32>'}} + %0 = vector.contract #contraction_trait %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<1x3xf32> into vector<2x3xf32> + return %0 : vector<2x3xf32> +} + +// ----- + func @create_mask() { %c2 = constant 2 : index %c3 = constant 3 : index diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index c194cbe238117..57c03c903fe89 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -160,9 +160,11 @@ func @contraction_to_scalar(%arg0: vector<10xf32>, %arg1: vector<10xf32>) -> f32 indexing_maps = #contraction_accesses0, iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"] } -#contraction_accesses1 = [ +#contraction_accesses1 = [ // 7, 8, 16, 15 affine_map<(f0, f1, f2, f3, c0, c1) -> (c0, f0, c1, f2)>, + // 8, 16, 7, 5 affine_map<(f0, f1, f2, f3, c0, c1) -> (f1, c1, c0, f3)>, + // 8, 8, 15, 5 affine_map<(f0, f1, f2, f3, c0, c1) -> (f0, f1, f2, f3)> ] #contraction_trait1 = { @@ -172,7 +174,7 @@ func @contraction_to_scalar(%arg0: vector<10xf32>, %arg1: vector<10xf32>) -> f32 } // CHECK-LABEL: contraction func @contraction(%arg0 : vector<7x8x16x15xf32>, %arg1 : vector<8x16x7x5xf32>, - %arg2 : vector<8x15x5xf32>, %arg3 : vector<8x15x8x5xf32>, + %arg2 : vector<8x15x5xf32>, %arg3 : vector<8x8x15x5xf32>, %arg4 : index) { // Test contraction with batch and contracting dims. // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32> @@ -181,16 +183,16 @@ func @contraction(%arg0 : vector<7x8x16x15xf32>, %arg1 : vector<8x16x7x5xf32>, // Test contraction with only contracting dims. In this case the lhs/rhs // dimension of size 8 will be considered a parallel dim for lhs/rhs and will // appear twice in the output. - // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32> + // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32> %1 = vector.contract #contraction_trait1 %arg0, %arg1, %arg3 - : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32> + : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32> // Test contraction with optional vector mask arguments. %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1> %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1> - // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32> + // CHECK: vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}} : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32> %2 = vector.contract #contraction_trait1 %arg0, %arg1, %arg3, %lhs_mask, %rhs_mask - : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32> + : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x8x15x5xf32> return } diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 7eea3baa8d87c..1dd2f377a29c0 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -681,3 +681,219 @@ func @genbool_var_2d(%arg0: index, %arg1: index) -> vector<2x3xi1> { %0 = vector.create_mask %arg0, %arg1 : vector<2x3xi1> return %0 : vector<2x3xi1> } + +#matmat_accesses_0 = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)> +] +#matmat_trait_0 = { + indexing_maps = #matmat_accesses_0, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_0 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32> +// OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<2x3xf32> +func @matmul_0(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<2x3xf32>) +-> vector<2x3xf32> +{ + %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<1x3xf32> into vector<2x3xf32> + return %0 : vector<2x3xf32> +} + +#matmat_accesses_1 = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (n, k)>, + affine_map<(m, n, k) -> (m, n)> +] +#matmat_trait_1 = { + indexing_maps = #matmat_accesses_1, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_1 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<3x1xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT: %[[Bt:.*]] = vector.transpose %[[B]], [1, 0] +// OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32> +// OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[Bt]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<2x3xf32> +func @matmul_1(%arg0: vector<2x1xf32>, %arg1: vector<3x1xf32>, %arg2: vector<2x3xf32>) +-> vector<2x3xf32> +{ + %0 = vector.contract #matmat_trait_1 %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<3x1xf32> into vector<2x3xf32> + return %0 : vector<2x3xf32> +} + +#matmat_accesses_2 = [ + affine_map<(m, n, k) -> (k, m)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)> +] +#matmat_trait_2 = { + indexing_maps = #matmat_accesses_2, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_2 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<1x2xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32> +// OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[A]][0] : vector<1x2xf32> +// OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<2x3xf32> +func @matmul_2(%arg0: vector<1x2xf32>, %arg1: vector<1x3xf32>, %arg2: vector<2x3xf32>) +-> vector<2x3xf32> +{ + %0 = vector.contract #matmat_trait_2 %arg0, %arg1, %arg2 + : vector<1x2xf32>, vector<1x3xf32> into vector<2x3xf32> + return %0 : vector<2x3xf32> +} + +#matmat_accesses_3 = [ + affine_map<(m, n, k) -> (k, m)>, + affine_map<(m, n, k) -> (n, k)>, + affine_map<(m, n, k) -> (m, n)> +] +#matmat_trait_3 = { + indexing_maps = #matmat_accesses_3, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_3 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<1x2xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<3x1xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<2x3xf32> +// OUTERPRODUCT: %[[Bt:.*]] = vector.transpose %[[B]], [1, 0] +// OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[A]][0] : vector<1x2xf32> +// OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[Bt]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[a0]], %[[b0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<2x3xf32> +func @matmul_3(%arg0: vector<1x2xf32>, %arg1: vector<3x1xf32>, %arg2: vector<2x3xf32>) +-> vector<2x3xf32> +{ + %0 = vector.contract #matmat_trait_3 %arg0, %arg1, %arg2 + : vector<1x2xf32>, vector<3x1xf32> into vector<2x3xf32> + return %0 : vector<2x3xf32> +} + +#matmat_accesses_4 = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (n, m)> +] +#matmat_trait_4 = { + indexing_maps = #matmat_accesses_4, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_4 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<3x2xf32> +func @matmul_4(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>) +-> vector<3x2xf32> +{ + %0 = vector.contract #matmat_trait_4 %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32> + return %0 : vector<3x2xf32> +} + +#matmat_accesses_5 = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (n, m)> +] +#matmat_trait_5 = { + indexing_maps = #matmat_accesses_5, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_5 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT-DAG: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32> +// OUTERPRODUCT-DAG: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<3x2xf32> +func @matmul_5(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>) +-> vector<3x2xf32> +{ + %0 = vector.contract #matmat_trait_5 %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32> + return %0 : vector<3x2xf32> +} + +#matmat_accesses_6 = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (n, m)> +] +#matmat_trait_6 = { + indexing_maps = #matmat_accesses_6, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_6 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT-DAG: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32> +// OUTERPRODUCT-DAG: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<3x2xf32> +func @matmul_6(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>) +-> vector<3x2xf32> +{ + %0 = vector.contract #matmat_trait_6 %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32> + return %0 : vector<3x2xf32> +} + +#matmat_accesses_7 = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (n, m)> +] +#matmat_trait_7 = { + indexing_maps = #matmat_accesses_7, + iterator_types = ["parallel", "parallel", "reduction"] +} + +// OUTERPRODUCT-LABEL: func @matmul_7 +// OUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<2x1xf32>, +// OUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<1x3xf32>, +// OUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x2xf32> +// OUTERPRODUCT: %[[At:.*]] = vector.transpose %[[A]], [1, 0] +// OUTERPRODUCT-DAG: %[[a0:.*]] = vector.extract %[[At]][0] : vector<1x2xf32> +// OUTERPRODUCT-DAG: %[[b0:.*]] = vector.extract %[[B]][0] : vector<1x3xf32> +// OUTERPRODUCT: %[[c0:.*]] = vector.outerproduct %[[b0]], %[[a0]], %[[C]] +// OUTERPRODUCT: return %[[c0]] : vector<3x2xf32> +func @matmul_7(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2xf32>) +-> vector<3x2xf32> +{ + %0 = vector.contract #matmat_trait_7 %arg0, %arg1, %arg2 + : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32> + return %0 : vector<3x2xf32> +} From 42725aeed8cbabc15e351e2854ae549df2c5dcde Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 30 Apr 2020 16:42:27 -0700 Subject: [PATCH 140/770] Process gep (select ptr1, ptr2) in SROA Differential Revision: https://reviews.llvm.org/D79217 --- llvm/lib/Transforms/Scalar/SROA.cpp | 51 ++++++++ llvm/test/Transforms/SROA/select-gep.ll | 149 ++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 llvm/test/Transforms/SROA/select-gep.ll diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 016de041b2f8e..1d486a3e74fd1 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3441,7 +3441,58 @@ class AggLoadStoreRewriter : public InstVisitor { return false; } + // Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2) + bool foldGEPSelect(GetElementPtrInst &GEPI) { + if (!GEPI.hasAllConstantIndices()) + return false; + + SelectInst *Sel = cast(GEPI.getPointerOperand()); + + LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):" + << "\n original: " << *Sel + << "\n " << GEPI); + + IRBuilderTy Builder(&GEPI); + SmallVector Index(GEPI.idx_begin(), GEPI.idx_end()); + bool IsInBounds = GEPI.isInBounds(); + + Value *True = Sel->getTrueValue(); + Value *NTrue = + IsInBounds + ? Builder.CreateInBoundsGEP(True, Index, + True->getName() + ".sroa.gep") + : Builder.CreateGEP(True, Index, True->getName() + ".sroa.gep"); + + Value *False = Sel->getFalseValue(); + + Value *NFalse = + IsInBounds + ? Builder.CreateInBoundsGEP(False, Index, + False->getName() + ".sroa.gep") + : Builder.CreateGEP(False, Index, False->getName() + ".sroa.gep"); + + Value *NSel = Builder.CreateSelect(Sel->getCondition(), NTrue, NFalse, + Sel->getName() + ".sroa.sel"); + GEPI.replaceAllUsesWith(NSel); + GEPI.eraseFromParent(); + + LLVM_DEBUG(dbgs() << "\n to: " << *NTrue + << "\n " << *NFalse + << "\n " << *NSel << '\n'); + + if (isa(NTrue)) + visit(cast(NTrue)); + if (isa(NFalse)) + visit(cast(NFalse)); + + return true; + } + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { + if (isa(GEPI.getPointerOperand()) && + foldGEPSelect(GEPI)) + return true; + enqueueUsers(GEPI); return false; } diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll new file mode 100644 index 0000000000000..93cb3420d0af7 --- /dev/null +++ b/llvm/test/Transforms/SROA/select-gep.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -sroa < %s | FileCheck %s + +%pair = type { i32, i32 } + +define i32 @test_sroa_select_gep(i1 %cond) { +; CHECK-LABEL: @test_sroa_select_gep( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[LOAD_SROA_SPECULATED:%.*]] = select i1 [[COND:%.*]], i32 1, i32 2 +; CHECK-NEXT: ret i32 [[LOAD_SROA_SPECULATED]] +; +bb: + %a = alloca %pair, align 4 + %b = alloca %pair, align 4 + %gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1 + %gep_b = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1 + store i32 1, i32* %gep_a, align 4 + store i32 2, i32* %gep_b, align 4 + %select = select i1 %cond, %pair* %a, %pair* %b + %gep = getelementptr inbounds %pair, %pair* %select, i32 0, i32 1 + %load = load i32, i32* %gep, align 4 + ret i32 %load +} + +define i32 @test_sroa_select_gep_non_inbound(i1 %cond) { +; CHECK-LABEL: @test_sroa_select_gep_non_inbound( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[LOAD_SROA_SPECULATED:%.*]] = select i1 [[COND:%.*]], i32 1, i32 2 +; CHECK-NEXT: ret i32 [[LOAD_SROA_SPECULATED]] +; +bb: + %a = alloca %pair, align 4 + %b = alloca %pair, align 4 + %gep_a = getelementptr %pair, %pair* %a, i32 0, i32 1 + %gep_b = getelementptr %pair, %pair* %b, i32 0, i32 1 + store i32 1, i32* %gep_a, align 4 + store i32 2, i32* %gep_b, align 4 + %select = select i1 %cond, %pair* %a, %pair* %b + %gep = getelementptr %pair, %pair* %select, i32 0, i32 1 + %load = load i32, i32* %gep, align 4 + ret i32 %load +} + +define i32 @test_sroa_select_gep_volatile_load(i1 %cond) { +; CHECK-LABEL: @test_sroa_select_gep_volatile_load( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_SROA_2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_SROA_2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 11, i32* [[A_SROA_0]], align 4 +; CHECK-NEXT: store i32 12, i32* [[B_SROA_0]], align 4 +; CHECK-NEXT: store i32 21, i32* [[A_SROA_2]], align 4 +; CHECK-NEXT: store i32 22, i32* [[B_SROA_2]], align 4 +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* [[B_SROA_0]] +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, i32* [[SELECT_SROA_SEL]], align 4 +; CHECK-NEXT: [[SELECT_SROA_SEL3:%.*]] = select i1 [[COND]], i32* [[A_SROA_2]], i32* [[B_SROA_2]] +; CHECK-NEXT: [[LOAD2:%.*]] = load volatile i32, i32* [[SELECT_SROA_SEL3]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret i32 [[ADD]] +; +bb: + %a = alloca %pair, align 4 + %b = alloca %pair, align 4 + %gep_a0 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0 + %gep_b0 = getelementptr inbounds %pair, %pair* %b, i32 0, i32 0 + store i32 11, i32* %gep_a0, align 4 + store i32 12, i32* %gep_b0, align 4 + %gep_a1 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1 + %gep_b1 = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1 + store i32 21, i32* %gep_a1, align 4 + store i32 22, i32* %gep_b1, align 4 + %select = select i1 %cond, %pair* %a, %pair* %b + %gep1 = getelementptr inbounds %pair, %pair* %select, i32 0, i32 0 + %load1 = load volatile i32, i32* %gep1, align 4 + %gep2 = getelementptr inbounds %pair, %pair* %select, i32 0, i32 1 + %load2 = load volatile i32, i32* %gep2, align 4 + %add = add i32 %load1, %load2 + ret i32 %add +} + +define i32 @test_sroa_select_gep_undef(i1 %cond) { +; CHECK-LABEL: @test_sroa_select_gep_undef( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* undef +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SELECT_SROA_SEL]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; +bb: + %a = alloca %pair, align 4 + %select = select i1 %cond, %pair* %a, %pair* undef + %gep = getelementptr inbounds %pair, %pair* %select, i32 0, i32 1 + %load = load i32, i32* %gep, align 4 + ret i32 %load +} + +define i32 @test_sroa_gep_select_gep(i1 %cond) { +; CHECK-LABEL: @test_sroa_gep_select_gep( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 1, i32* [[A_SROA_0]], align 4 +; CHECK-NEXT: store i32 2, i32* [[B_SROA_0]], align 4 +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* [[B_SROA_0]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[COND]], i32* [[SELECT_SROA_SEL]], i32* [[A_SROA_0]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SELECT2]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; +bb: + %a = alloca %pair, align 4 + %b = alloca %pair, align 4 + %gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1 + %gep_b = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1 + store i32 1, i32* %gep_a, align 4 + store i32 2, i32* %gep_b, align 4 + %select = select i1 %cond, i32* %gep_a, i32* %gep_b + %gep = getelementptr inbounds i32, i32* %select, i32 0 + %select2 = select i1 %cond, i32* %gep, i32* %gep_a + %load = load i32, i32* %select2, align 4 + ret i32 %load +} + +define i32 @test_sroa_gep_select_gep_nonconst_idx(i1 %cond, i32 %idx) { +; CHECK-LABEL: @test_sroa_gep_select_gep_nonconst_idx( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[A:%.*]] = alloca [[PAIR:%.*]], align 4 +; CHECK-NEXT: [[B:%.*]] = alloca [[PAIR]], align 4 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[A]], i32 0, i32 1 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i32 1, i32* [[GEP_A]], align 4 +; CHECK-NEXT: store i32 2, i32* [[GEP_B]], align 4 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], %pair* [[A]], %pair* [[B]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[SELECT]], i32 [[IDX:%.*]], i32 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; +bb: + %a = alloca %pair, align 4 + %b = alloca %pair, align 4 + %gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1 + %gep_b = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1 + store i32 1, i32* %gep_a, align 4 + store i32 2, i32* %gep_b, align 4 + %select = select i1 %cond, %pair* %a, %pair* %b + %gep = getelementptr inbounds %pair, %pair* %select, i32 %idx, i32 1 + %load = load i32, i32* %gep, align 4 + ret i32 %load +} From bd7ff5d94f0f591206188267a0e1529fa13d6c2e Mon Sep 17 00:00:00 2001 From: Chris Jackson Date: Tue, 26 May 2020 19:28:34 +0100 Subject: [PATCH 141/770] [DebugInfo] Correct debuginfo for post-ra hoist and sink in Machine LICM Reviewers: vsk, aprantl Differential Revision: https://reviews.llvm.org/D79868 --- llvm/lib/CodeGen/MachineLICM.cpp | 12 + .../MIR/X86/mlicm-hoist-post-regalloc.mir | 93 ++++++++ ...hoist.mir => mlicm-hoist-pre-regalloc.mir} | 33 +-- llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir | 216 ++++++++++++++++++ 4 files changed, 324 insertions(+), 30 deletions(-) create mode 100644 llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir rename llvm/test/DebugInfo/MIR/X86/{mlicm-hoist.mir => mlicm-hoist-pre-regalloc.mir} (79%) create mode 100644 llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 0a3e9dcd3af7e..2a60858b6de21 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -635,6 +635,11 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *MBB = MI->getParent(); Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); + // Since we are moving the instruction out of its basic block, we do not + // retain its debug location. Doing so would degrade the debugging + // experience and adversely affect the accuracy of profiling information. + MI->setDebugLoc(DebugLoc()); + // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is // important to ensure later passes do not scavenge the def register. @@ -829,7 +834,14 @@ void MachineLICMBase::SinkIntoLoop() { } if (!CanSink || !B || B == Preheader) continue; + + LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from " + << printMBBReference(*I->getParent()) << ": " << *I); B->splice(B->getFirstNonPHI(), Preheader, I); + + // The instruction is is moved from its basic block, so do not retain the + // debug information. + I->setDebugLoc(DebugLoc()); } } diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir new file mode 100644 index 0000000000000..91f77d331f184 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir @@ -0,0 +1,93 @@ +--- | + ; RUN: llc -start-before=phi-node-elimination -stop-after=machinelicm -debug-only=machinelicm -o - %s | FileCheck %s + ; Ensure we execute machinelicm post register allocation. + ; Line numbers should not be retained when loop invariant instructions are hoisted. + ; + ; CHECK-LABEL: bb.0.entry: + ; CHECK: MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @x, $noreg :: (load 8 from got) + ; CHECK-LABEL: bb.1.while.body: + ; + + @x = common local_unnamed_addr global i32 0, align 4, !dbg !0 + + define void @Process(i32* nocapture readonly %p) !dbg !10 { + entry: + call void @llvm.dbg.value(metadata i32* %p, metadata !17, metadata !DIExpression()), !dbg !18 + br label %while.body, !dbg !19 + + while.body: ; preds = %while.body, %entry + %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %while.body ] + call void @llvm.dbg.value(metadata i32* %p.addr.0, metadata !17, metadata !DIExpression()), !dbg !18 + %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 1, !dbg !20 + call void @llvm.dbg.value(metadata i32* %incdec.ptr, metadata !17, metadata !DIExpression()), !dbg !18 + %0 = load i32, i32* %p.addr.0, align 4, !dbg !21 + store i32 %0, i32* @x, align 4, !dbg !22 + br label %while.body, !dbg !23, !llvm.loop !25 + } + + declare void @llvm.dbg.value(metadata, metadata, metadata) + + + !llvm.dbg.cu = !{!2} + !llvm.module.flags = !{!7, !8} + !llvm.ident = !{!9} + + !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) + !1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) + !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) + !3 = !DIFile(filename: "t.ll", directory: "/tmp/") + !4 = !{} + !5 = !{!0} + !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !7 = !{i32 2, !"Dwarf Version", i32 4} + !8 = !{i32 2, !"Debug Info Version", i32 3} + !9 = !{!"clang version 10.0.0 "} + !10 = distinct !DISubprogram(name: "Process", scope: !3, file: !3, line: 2, type: !11, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !16) + !11 = !DISubroutineType(types: !12) + !12 = !{null, !13} + !13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) + !14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15) + !15 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) + !16 = !{!17} + !17 = !DILocalVariable(name: "p", arg: 1, scope: !10, file: !3, line: 2, type: !13) + !18 = !DILocation(line: 2, column: 34, scope: !10) + !19 = !DILocation(line: 4, column: 3, scope: !10) + !20 = !DILocation(line: 5, column: 11, scope: !10) + !21 = !DILocation(line: 5, column: 9, scope: !10) + !22 = !DILocation(line: 5, column: 7, scope: !10) + !23 = !DILocation(line: 4, column: 3, scope: !24) + !24 = !DILexicalBlockFile(scope: !10, file: !3, discriminator: 1) + !25 = distinct !{!25, !19, !20} + +... +--- +name: Process +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr64 } + - { id: 3, class: gr32 } + - { id: 4, class: gr64 } +body: | + bb.0.entry: + successors: %bb.1.while.body(0x80000000) + liveins: $rdi + + DBG_VALUE $rdi, _, !17, !DIExpression(), debug-location !18 + %2 = COPY $rdi + DBG_VALUE %2, _, !17, !DIExpression(), debug-location !18 + + bb.1.while.body: + successors: %bb.1.while.body(0x80000000) + + %0 = PHI %2, %bb.0.entry, %1, %bb.1.while.body + DBG_VALUE %0, _, !17, !DIExpression(), debug-location !18 + %1 = ADD64ri8 %0, 4, implicit-def dead $eflags, debug-location !20 + DBG_VALUE %1, _, !17, !DIExpression(), debug-location !18 + %3 = MOV32rm %0, 1, _, 0, _, debug-location !21 :: (load 4 from %ir.p.addr.0) + %4 = MOV64rm $rip, 1, _, target-flags(x86-gotpcrel) @x, _, debug-location !22 :: (load 8 from got) + MOV32mr killed %4, 1, _, 0, _, killed %3, debug-location !22 :: (store 4 into @x) + JMP_1 %bb.1.while.body, debug-location !23 + +... diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir similarity index 79% rename from llvm/test/DebugInfo/MIR/X86/mlicm-hoist.mir rename to llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir index 61dabf8910b67..8c0eb376eb408 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir @@ -23,13 +23,11 @@ ; ; ModuleID = 'tx.ll' source_filename = "t.c" - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - target triple = "x86_64-unknown-linux-gnu" @x = common local_unnamed_addr global i32 0, align 4, !dbg !0 ; Function Attrs: noreturn nounwind uwtable - define void @Process(i32* nocapture readonly %p) local_unnamed_addr #0 !dbg !9 { + define void @Process(i32* nocapture readonly %p) local_unnamed_addr !dbg !9 { entry: tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !16, metadata !17), !dbg !18 br label %while.body, !dbg !19 @@ -45,11 +43,7 @@ } ; Function Attrs: nounwind readnone - declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 - - attributes #0 = { noreturn nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { nounwind readnone } - attributes #2 = { nounwind } + declare void @llvm.dbg.value(metadata, i64, metadata, metadata) !llvm.dbg.cu = !{!1} !llvm.module.flags = !{!6, !7} @@ -80,7 +74,7 @@ !22 = !{!23, !23, i64 0} !23 = !{!"int", !24, i64 0} !24 = !{!"omnipotent char", !25, i64 0} - !25 = !{!"Simple C/C++ TBAA"} + !25 = !{!"C++"} !26 = !DILocation(line: 5, column: 7, scope: !9) !27 = !DILocation(line: 4, column: 3, scope: !28) !28 = !DILexicalBlockFile(scope: !9, file: !2, discriminator: 1) @@ -89,11 +83,6 @@ ... --- name: Process -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false tracksRegLiveness: true registers: - { id: 0, class: gr64 } @@ -101,22 +90,6 @@ registers: - { id: 2, class: gr64 } - { id: 3, class: gr32 } - { id: 4, class: gr64 } -liveins: - - { reg: '$rdi', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false body: | bb.0.entry: successors: %bb.1.while.body(0x80000000) diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir new file mode 100644 index 0000000000000..7b5a19ffa9e7f --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir @@ -0,0 +1,216 @@ +--- | + ; RUN: llc --run-pass=machinelicm -sink-insts-to-avoid-spills %s -o - | FileCheck %s --match-full-lines + ; CHECK-LABEL: bb.4 (%ir-block.9): + ; CHECK: %0:gr64 = nuw ADD64ri8 %9, 4, implicit-def dead $eflags + ; + ; When instructions are sunk to prevent register spills, line numbers should not be retained. + + %struct.A = type { i32, i32, i32, i32, i32, i32 } + + define void @p(i8* nocapture readonly %input, %struct.A* %a) !dbg !10 { + %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1, !dbg !18 + %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2 + %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3 + %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4 + %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5 + %scevgep = getelementptr i8, i8* %input, i64 1 + br label %.backedge + + .backedge: ; preds = %.backedge.backedge, %0 + %lsr.iv = phi i8* [ %scevgep1, %.backedge.backedge ], [ %scevgep, %0 ] + %6 = load i8, i8* %lsr.iv, align 1 + switch i8 %6, label %.backedge.backedge [ + i8 0, label %7 + i8 10, label %9 + i8 20, label %10 + i8 30, label %11 + i8 40, label %12 + i8 50, label %13 + ] + + 7: ; preds = %.backedge + %8 = bitcast %struct.A* %a to i32* + tail call void @f(i32* %8) + br label %.backedge.backedge + + 9: ; preds = %.backedge + tail call void @f(i32* %1) + br label %.backedge.backedge + + .backedge.backedge: ; preds = %13, %12, %11, %10, %9, %7, %.backedge + %scevgep1 = getelementptr i8, i8* %lsr.iv, i64 1 + br label %.backedge + + 10: ; preds = %.backedge + tail call void @f(i32* %2) + br label %.backedge.backedge + + 11: ; preds = %.backedge + tail call void @f(i32* %3) + br label %.backedge.backedge + + 12: ; preds = %.backedge + tail call void @f(i32* %4) + br label %.backedge.backedge + + 13: ; preds = %.backedge + tail call void @f(i32* %5) + br label %.backedge.backedge + } + + declare void @f(i32*) + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) + + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!7, !8} + !llvm.ident = !{!9} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3) + !1 = !DIFile(filename: "t.ll", directory: "tmp/X86") + !2 = !{} + !3 = !{!4} + !4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) + !5 = !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true) + !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !7 = !{i32 2, !"Dwarf Version", i32 4} + !8 = !{i32 2, !"Debug Info Version", i32 3} + !9 = !{!"clang version 10.0.0 "} + !10 = distinct !DISubprogram(name: "p", scope: !1, file: !1, line: 2, type: !11, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) + !11 = !DISubroutineType(types: !12) + !12 = !{null, !13} + !13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) + !14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15) + !15 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) + !16 = !{!17} + !17 = !DILocalVariable(name: "a", arg: 1, scope: !10, file: !1, line: 2, type: !15) + !18 = !DILocation(line: 4, column: 3, scope: !10) + + +... +--- +name: p +tracksRegLiveness: true +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr64, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } + - { id: 6, class: gr64, preferred-register: '' } + - { id: 7, class: gr64, preferred-register: '' } + - { id: 8, class: gr64, preferred-register: '' } + - { id: 9, class: gr64, preferred-register: '' } + - { id: 10, class: gr64_nosp, preferred-register: '' } + - { id: 11, class: gr32, preferred-register: '' } + - { id: 12, class: gr64, preferred-register: '' } + - { id: 13, class: gr64, preferred-register: '' } + - { id: 14, class: gr64, preferred-register: '' } + - { id: 15, class: gr64, preferred-register: '' } +jumpTable: + kind: label-difference32 + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.3', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.5', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.6', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.7', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.8' ] +body: | + bb.0 (%ir-block.0): + successors: %bb.1(0x80000000) + liveins: $rdi, $rsi + + %9:gr64 = COPY $rsi + %8:gr64 = COPY $rdi + %0:gr64 = nuw ADD64ri8 %9, 4, implicit-def dead $eflags, debug-location !18 + %1:gr64 = nuw ADD64ri8 %9, 8, implicit-def dead $eflags + %2:gr64 = nuw ADD64ri8 %9, 12, implicit-def dead $eflags + %3:gr64 = nuw ADD64ri8 %9, 16, implicit-def dead $eflags + %4:gr64 = nuw ADD64ri8 %9, 20, implicit-def dead $eflags + %5:gr64 = INC64r %8, implicit-def dead $eflags + + bb.1..backedge: + successors: %bb.4(0x09249249), %bb.9(0x76db6db7) + + %6:gr64 = PHI %5, %bb.0, %7, %bb.4 + %11:gr32 = MOVZX32rm8 %6, 1, $noreg, 0, $noreg :: (load 1 from %ir.lsr.iv) + %10:gr64_nosp = SUBREG_TO_REG 0, killed %11, %subreg.sub_32bit + %12:gr64 = SUB64ri8 %10, 50, implicit-def $eflags + JCC_1 %bb.4, 7, implicit $eflags + + bb.9..backedge: + successors: %bb.2(0x13b13b14), %bb.4(0x09d89d8a), %bb.3(0x13b13b14), %bb.5(0x13b13b14), %bb.6(0x13b13b14), %bb.7(0x13b13b14), %bb.8(0x13b13b14) + + %13:gr64 = LEA64r $rip, 1, $noreg, %jump-table.0, $noreg + %14:gr64 = MOVSX64rm32 %13, 4, %10, 0, $noreg :: (load 4 from jump-table) + %15:gr64 = ADD64rr %14, %13, implicit-def dead $eflags + JMP64r killed %15 + + bb.2 (%ir-block.7): + successors: %bb.4(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %9 + CALL64pcrel32 target-flags(x86-plt) @f, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.4 + + bb.3 (%ir-block.9): + successors: %bb.4(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %0 + CALL64pcrel32 target-flags(x86-plt) @f, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + + bb.4..backedge.backedge: + successors: %bb.1(0x80000000) + + %7:gr64 = INC64r %6, implicit-def dead $eflags + JMP_1 %bb.1 + + bb.5 (%ir-block.10): + successors: %bb.4(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %1 + CALL64pcrel32 target-flags(x86-plt) @f, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.4 + + bb.6 (%ir-block.11): + successors: %bb.4(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %2 + CALL64pcrel32 target-flags(x86-plt) @f, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.4 + + bb.7 (%ir-block.12): + successors: %bb.4(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %3 + CALL64pcrel32 target-flags(x86-plt) @f, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.4 + + bb.8 (%ir-block.13): + successors: %bb.4(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %4 + CALL64pcrel32 target-flags(x86-plt) @f, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.4 + +... From e1d2cecec5197af7104e4c50e6aed4313d512cda Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 13:15:43 -0700 Subject: [PATCH 142/770] [lldb/Test] Cleanup TestSymbolTable.py (NFC) --- .../test/API/lang/objc/foundation/TestSymbolTable.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py index f3331e829c27e..df4860f148260 100644 --- a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py +++ b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py @@ -17,8 +17,7 @@ class FoundationSymtabTestCase(TestBase): '-[MyString dealloc]', '-[MyString description]', '-[MyString descriptionPauses]', # synthesized property - # synthesized property - '-[MyString setDescriptionPauses:]', + '-[MyString setDescriptionPauses:]', # synthesized property 'Test_Selector', 'Test_NSString', 'Test_MyString', @@ -31,14 +30,13 @@ def test_with_python_api(self): """Test symbol table access with Python APIs.""" self.build() exe = self.getBuildArtifact("a.out") - self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - target = self.dbg.CreateTarget(exe) self.assertTrue(target, VALID_TARGET) # Launch the process, and do not stop at the entry point. process = target.LaunchSimple( None, None, self.get_process_working_directory()) + self.assertTrue(process, PROCESS_IS_VALID) # Create the filespec by which to locate our a.out module. filespec = lldb.SBFileSpec(exe, False) @@ -51,14 +49,14 @@ def test_with_python_api(self): expected_symbols = set(self.symbols_list) for symbol in module: self.assertTrue(symbol, VALID_SYMBOL) - #print("symbol:", symbol) + self.trace("symbol:", symbol) name = symbol.GetName() if name in expected_symbols: - #print("Removing %s from known_symbols %s" % (name, expected_symbols)) + self.trace("Removing %s from known_symbols %s" % (name, expected_symbols)) expected_symbols.remove(name) # At this point, the known_symbols set should have become an empty set. # If not, raise an error. - #print("symbols unaccounted for:", expected_symbols) + self.trace("symbols unaccounted for:", expected_symbols) self.assertTrue(len(expected_symbols) == 0, "All the known symbols are accounted for") From e9003207591e4830bcce2de1631db901f8c4f2b8 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 26 May 2020 16:21:15 -0400 Subject: [PATCH 143/770] [mlir] Hotfix - Drop spurious constexpr that breaks build --- mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 02d2762560767..13db3a2a88d2a 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -81,21 +81,21 @@ constexpr StringRef getPaddingAttrName() { return "padding"; } /// Use to encode that a particular iterator type has parallel semantics. constexpr StringRef getParallelIteratorTypeName() { return "parallel"; } -constexpr bool isParallelIterator(Attribute attr) { +bool isParallelIterator(Attribute attr) { auto strAttr = attr.dyn_cast_or_null(); return strAttr && strAttr.getValue() == getParallelIteratorTypeName(); } /// Use to encode that a particular iterator type has reduction semantics. constexpr StringRef getReductionIteratorTypeName() { return "reduction"; } -constexpr bool isReductionIterator(Attribute attr) { +bool isReductionIterator(Attribute attr) { auto strAttr = attr.dyn_cast_or_null(); return strAttr && strAttr.getValue() == getReductionIteratorTypeName(); } /// Use to encode that a particular iterator type has window semantics. constexpr StringRef getWindowIteratorTypeName() { return "window"; } -constexpr bool isWindowIterator(Attribute attr) { +bool isWindowIterator(Attribute attr) { auto strAttr = attr.dyn_cast_or_null(); return strAttr && strAttr.getValue() == getWindowIteratorTypeName(); } From c990bdf7f8761f047fac85615377835edf015698 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 26 May 2020 16:24:56 -0400 Subject: [PATCH 144/770] [mlir] Hotfix - Add inline to avoid multiple symbols on trivial functions --- mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 13db3a2a88d2a..168e877e50561 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -81,21 +81,21 @@ constexpr StringRef getPaddingAttrName() { return "padding"; } /// Use to encode that a particular iterator type has parallel semantics. constexpr StringRef getParallelIteratorTypeName() { return "parallel"; } -bool isParallelIterator(Attribute attr) { +inline bool isParallelIterator(Attribute attr) { auto strAttr = attr.dyn_cast_or_null(); return strAttr && strAttr.getValue() == getParallelIteratorTypeName(); } /// Use to encode that a particular iterator type has reduction semantics. constexpr StringRef getReductionIteratorTypeName() { return "reduction"; } -bool isReductionIterator(Attribute attr) { +inline bool isReductionIterator(Attribute attr) { auto strAttr = attr.dyn_cast_or_null(); return strAttr && strAttr.getValue() == getReductionIteratorTypeName(); } /// Use to encode that a particular iterator type has window semantics. constexpr StringRef getWindowIteratorTypeName() { return "window"; } -bool isWindowIterator(Attribute attr) { +inline bool isWindowIterator(Attribute attr) { auto strAttr = attr.dyn_cast_or_null(); return strAttr && strAttr.getValue() == getWindowIteratorTypeName(); } From bb10fa3a53f928e2e24ad3eaf8e57508fe9d4320 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 24 May 2020 12:28:59 -0400 Subject: [PATCH 145/770] AMDGPU: Fix wrong null value for private address space I'm guessing this was a holdover from when 0 was an invalid stack pointer, but surprised nobody has discovered this before. Also don't allow offset folding for -1 pointers, since it looks weird to partially fold this. --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 36 +++++++------- .../AMDGPU/AMDGPUInstructionSelector.cpp | 5 +- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 3 +- .../GlobalISel/inst-select-load-private.mir | 28 +++++++++++ .../GlobalISel/legalize-addrspacecast.mir | 26 +++++----- llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 47 +++++++++++++++---- llvm/test/CodeGen/AMDGPU/nullptr.ll | 4 +- 7 files changed, 107 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 52823c16d72d9..edd8ea39e0f42 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1482,22 +1482,26 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); if (ConstantSDNode *CAddr = dyn_cast(Addr)) { - unsigned Imm = CAddr->getZExtValue(); - - SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); - MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, - DL, MVT::i32, HighBits); - VAddr = SDValue(MovHighBits, 0); - - // In a call sequence, stores to the argument stack area are relative to the - // stack pointer. - const MachinePointerInfo &PtrInfo = cast(Parent)->getPointerInfo(); - - SOffset = isStackPtrRelative(PtrInfo) - ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32) - : CurDAG->getTargetConstant(0, DL, MVT::i32); - ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); - return true; + int64_t Imm = CAddr->getSExtValue(); + const int64_t NullPtr = + AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS); + // Don't fold null pointer. + if (Imm != NullPtr) { + SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); + MachineSDNode *MovHighBits = CurDAG->getMachineNode( + AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); + VAddr = SDValue(MovHighBits, 0); + + // In a call sequence, stores to the argument stack area are relative to the + // stack pointer. + const MachinePointerInfo &PtrInfo + = cast(Parent)->getPointerInfo(); + SOffset = isStackPtrRelative(PtrInfo) + ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32) + : CurDAG->getTargetConstant(0, DL, MVT::i32); + ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); + return true; + } } if (CurDAG->isBaseWithConstantOffset(Addr)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index aee6c0dd8a8e0..5afec2188d66b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3062,7 +3062,8 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { const SIMachineFunctionInfo *Info = MF->getInfo(); int64_t Offset = 0; - if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) { + if (mi_match(Root.getReg(), *MRI, m_ICst(Offset)) && + Offset != TM.getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS)) { Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); // TODO: Should this be inside the render function? The iterator seems to @@ -3091,7 +3092,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }}}; } - assert(Offset == 0); + assert(Offset == 0 || Offset == -1); // Try to fold a frame index directly into the MUBUF vaddr field, and any // offsets. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 2ef6cd5b3e338..e223fecc88195 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -57,8 +57,9 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { void adjustPassManager(PassManagerBuilder &) override; /// Get the integer value of a null pointer in the given address space. - uint64_t getNullPointerValue(unsigned AddrSpace) const { + static int64_t getNullPointerValue(unsigned AddrSpace) { return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0; } }; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index 13e4035a48828..79284fdfd05f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -843,3 +843,31 @@ body: | $vgpr0 = COPY %3 ... + +# Should not fold offset if this is a null dereference. +--- + +name: load_private_s32_from_neg1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_neg1 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_neg1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + %0:vgpr(p5) = G_CONSTANT i32 -1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 643bdd3b7d582..395d34a00081d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -173,12 +173,12 @@ body: | ; VI-LABEL: name: test_addrspacecast_p5_to_p0 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -186,7 +186,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0) ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -216,7 +216,7 @@ body: | ; VI-LABEL: name: test_addrspacecast_p0_to_p5 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] @@ -224,7 +224,7 @@ body: | ; VI: $vgpr0 = COPY [[SELECT]](p5) ; GFX9-LABEL: name: test_addrspacecast_p0_to_p5 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GFX9: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] @@ -232,7 +232,7 @@ body: | ; GFX9: $vgpr0 = COPY [[SELECT]](p5) ; SI-LABEL: name: test_addrspacecast_p0_to_p5 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; SI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; SI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; SI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] @@ -260,8 +260,8 @@ body: | ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -466,15 +466,15 @@ body: | ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] ; VI: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 764f935097642..d16edbac75fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -76,7 +76,7 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 +; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] @@ -89,7 +89,7 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 +; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 ; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc ; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] @@ -167,7 +167,7 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] -; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] +; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { @@ -252,12 +252,16 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { ; FIXME: Shouldn't need to enable queue ptr ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: -; CI: enable_sgpr_queue_ptr = 1 -; GFX9: enable_sgpr_queue_ptr = 0 +; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 +; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 +; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]] + +; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} -; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(5)* null to i32* @@ -266,14 +270,41 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { } ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: -; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} -; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { %cast = addrspacecast i32* null to i32 addrspace(5)* store volatile i32 7, i32 addrspace(5)* %cast ret void } + +; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast: +; CI: enable_sgpr_queue_ptr = 1 +; GFX9: enable_sgpr_queue_ptr = 0 + +; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32* + store volatile i32 7, i32* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_neg1_flat_to_private_addrspacecast: +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 +define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 { + %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(5)* + store volatile i32 7, i32 addrspace(5)* %cast + ret void +} + + ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll index 4eaf9836bb9d3..16292f0ebee08 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll @@ -4,7 +4,7 @@ %struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*} ; CHECK-LABEL: nullptr_priv: -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long -1 @nullptr_priv = global i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*) ; CHECK-LABEL: nullptr_glob: @@ -98,7 +98,7 @@ @nullptr23 = global i32 addrspace(23)* addrspacecast (i32* null to i32 addrspace(23)*) ; CHECK-LABEL: structWithPointers: -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long -1 ; GCN-NEXT: .zero 4 ; GCN-NEXT: .quad 0 ; R600-NEXT: .long 0 From 14de6e29b1315e9abe61d71e3e13f75bff80e1be Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 26 May 2020 12:51:46 -0700 Subject: [PATCH 146/770] [Clang][Driver] Add Bounds and Thread to SupportsCoverage list Summary: This permits combining -fsanitize-coverage with -fsanitize=bounds or -fsanitize=thread. Note that, GCC already supports combining these. Tested: - Add Clang end-to-end test checking IR is generated for both combinations of sanitizers. - Several previously failing TSAN tests now pass. Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=45831 Reviewers: vitalybuka Reviewed By: vitalybuka Subscribers: #sanitizers, dvyukov, nickdesaulniers, cfe-commits Tags: #clang, #sanitizers Differential Revision: https://reviews.llvm.org/D79628 --- clang/lib/Driver/SanitizerArgs.cpp | 5 +++-- clang/test/CodeGen/sanitize-coverage.c | 22 +++++++++++++++++++ clang/test/Driver/fsanitize-coverage.c | 2 ++ .../sanitizer_coverage_inline8bit_counter.cpp | 1 - .../sanitizer_coverage_inline_bool_flag.cpp | 1 - .../TestCases/sanitizer_coverage_no_prune.cpp | 2 +- .../sanitizer_coverage_stack_depth.cpp | 2 -- ...sanitizer_coverage_trace_pc_guard-init.cpp | 1 - 8 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/sanitize-coverage.c diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index bc186fa5a5982..35e982a502ef6 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -43,11 +43,12 @@ static const SanitizerMask SupportsCoverage = SanitizerKind::KernelAddress | SanitizerKind::KernelHWAddress | SanitizerKind::MemTag | SanitizerKind::Memory | SanitizerKind::KernelMemory | SanitizerKind::Leak | - SanitizerKind::Undefined | SanitizerKind::Integer | + SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::Bounds | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | SanitizerKind::DataFlow | SanitizerKind::Fuzzer | SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero | - SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack; + SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack | + SanitizerKind::Thread; static const SanitizerMask RecoverableByDefault = SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | diff --git a/clang/test/CodeGen/sanitize-coverage.c b/clang/test/CodeGen/sanitize-coverage.c new file mode 100644 index 0000000000000..6fc8e39354d4f --- /dev/null +++ b/clang/test/CodeGen/sanitize-coverage.c @@ -0,0 +1,22 @@ +// RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=address -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,ASAN +// RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=bounds -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,BOUNDS +// RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=memory -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,MSAN +// RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=thread -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,TSAN +// RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=undefined -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,UBSAN + +int x[10]; + +// CHECK-LABEL: define dso_local void @foo( +void foo(int n) { + // CHECK-DAG: call void @__sanitizer_cov_trace_pc + // CHECK-DAG: call void @__sanitizer_cov_trace_const_cmp + // ASAN-DAG: call void @__asan_report_store + // MSAN-DAG: call void @__msan_warning + // BOUNDS-DAG: call void @__ubsan_handle_out_of_bounds + // TSAN-DAG: call void @__tsan_func_entry + // UBSAN-DAG: call void @__ubsan_handle + if (n) + x[n] = 42; +} +// CHECK-LABEL: declare void diff --git a/clang/test/Driver/fsanitize-coverage.c b/clang/test/Driver/fsanitize-coverage.c index b10fc86bb3911..02078d847512e 100644 --- a/clang/test/Driver/fsanitize-coverage.c +++ b/clang/test/Driver/fsanitize-coverage.c @@ -12,8 +12,10 @@ // RUN: %clang -target x86_64-linux-gnu -fsanitize=kernel-memory -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // RUN: %clang -target x86_64-linux-gnu -fsanitize=leak -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=bounds -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // RUN: %clang -target x86_64-linux-gnu -fsanitize=bool -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // RUN: %clang -target x86_64-linux-gnu -fsanitize=dataflow -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=thread -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // RUN: %clang -target %itanium_abi_triple -fsanitize=float-divide-by-zero -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // CHECK-SANITIZE-COVERAGE-FUNC: fsanitize-coverage-type=1 diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cpp index 58a64d1a92dc8..68eca85eb4d42 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cpp @@ -5,7 +5,6 @@ // // RUN: %clangxx -O0 %s -fsanitize-coverage=inline-8bit-counters,pc-table -o %t // RUN: %run %t 2>&1 | FileCheck %s -// XFAIL: tsan #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline_bool_flag.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline_bool_flag.cpp index c3783e80f6237..d62ffe613b5b0 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline_bool_flag.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline_bool_flag.cpp @@ -5,7 +5,6 @@ // // RUN: %clangxx -O0 %s -fsanitize-coverage=inline-bool-flag,pc-table -o %t // RUN: %run %t 2>&1 | FileCheck %s -// XFAIL: tsan #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cpp index 9604da222f8e9..6a7bb0dda0a82 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cpp @@ -2,7 +2,7 @@ // REQUIRES: has_sancovcc,stable-runtime // UNSUPPORTED: i386-darwin -// XFAIL: ubsan,tsan +// XFAIL: ubsan // XFAIL: android && asan // RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc,bb,no-prune 2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 3 diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cpp index 90959ef5b0287..29a63c0a92f32 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cpp @@ -1,7 +1,5 @@ // Tests -fsanitize-coverage=stack-depth // -// XFAIL: tsan -// // RUN: %clangxx -O0 -std=c++11 -fsanitize-coverage=stack-depth %s -o %t // RUN: %run %t 2>&1 | FileCheck %s --implicit-check-not Assertion{{.*}}failed // RUN: %clangxx -O0 -std=c++11 -fsanitize-coverage=trace-pc-guard,stack-depth \ diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-init.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-init.cpp index b92a513b6d65f..0b2da9aebac8e 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-init.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-init.cpp @@ -1,7 +1,6 @@ // Tests trace pc guard coverage collection. // // REQUIRES: has_sancovcc,stable-runtime,x86_64-linux -// XFAIL: tsan // // RUN: DIR=%t_workdir // RUN: CLANG_ARGS="-O0 -fsanitize-coverage=trace-pc-guard" From 09de6e0fbd0b6ca7fa8760ac3513be6bbbba5a81 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 26 May 2020 13:49:23 -0700 Subject: [PATCH 147/770] Let @skipUnlessAddressSanitizer imply @skipIfAsan Don't run tests that use address sanitizer inside an address-sanitized LLDB. The tests don't support that configuration. Incidentally they were skipped on green dragon for a different reason, so this hasn't come up there before. --- .../Python/lldbsuite/test/decorators.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 65b63b4b40a8e..b94b672e44999 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -785,10 +785,21 @@ def is_compiler_clang_with_ubsan(self): return skipTestIfFn(is_compiler_clang_with_ubsan)(func) +def is_running_under_asan(): + if ('ASAN_OPTIONS' in os.environ): + return "ASAN unsupported" + return None + def skipUnlessAddressSanitizer(func): """Decorate the item to skip test unless Clang -fsanitize=thread is supported.""" def is_compiler_with_address_sanitizer(self): + # Also don't run tests that use address sanitizer inside an + # address-sanitized LLDB. The tests don't support that + # configuration. + if is_running_under_asan(): + return "Address sanitizer tests are disabled when runing under ASAN" + compiler_path = self.getCompiler() compiler = os.path.basename(compiler_path) f = tempfile.NamedTemporaryFile() @@ -803,6 +814,10 @@ def is_compiler_with_address_sanitizer(self): return None return skipTestIfFn(is_compiler_with_address_sanitizer)(func) +def skipIfAsan(func): + """Skip this test if the environment is set up to run LLDB *itself* under ASAN.""" + return skipTestIfFn(is_running_under_asan)(func) + def _get_bool_config_skip_if_decorator(key): config = lldb.SBDebugger.GetBuildConfiguration() value_node = config.GetValueForKey(key) @@ -847,14 +862,6 @@ def is_feature_enabled(self): return "%s is not supported on this system." % feature return skipTestIfFn(is_feature_enabled) -def skipIfAsan(func): - """Skip this test if the environment is set up to run LLDB itself under ASAN.""" - def is_asan(): - if ('ASAN_OPTIONS' in os.environ): - return "ASAN unsupported" - return None - return skipTestIfFn(is_asan)(func) - def skipIfReproducer(func): """Skip this test if the environment is set up to run LLDB with reproducers.""" def is_reproducer(): From 01fee8aa24a6070542cfa55b2c32036d1d5869b8 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Tue, 26 May 2020 13:53:16 -0700 Subject: [PATCH 148/770] [MLICM] Remove unneeded option so the test doesn't fail. --- llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir index 91f77d331f184..97cdea090c9c5 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir @@ -1,5 +1,5 @@ --- | - ; RUN: llc -start-before=phi-node-elimination -stop-after=machinelicm -debug-only=machinelicm -o - %s | FileCheck %s + ; RUN: llc -start-before=phi-node-elimination -stop-after=machinelicm -o - %s | FileCheck %s ; Ensure we execute machinelicm post register allocation. ; Line numbers should not be retained when loop invariant instructions are hoisted. ; From 5cf90d6cf1b811a6693383c487f79d24d5b306bb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 26 May 2020 21:50:15 +0100 Subject: [PATCH 149/770] [LoopUnroll] Simplify latch/header block handling (NFC). I think the current code dealing with connecting the unrolled iterations is a bit more complicated than necessary currently. To connect the unrolled iterations, we have to update the unrolled latch blocks to branch to the header of the next unrolled iteration. We need to do this regardless whether the latch is exiting or not. Additionally, we try to turn the conditional branch in the exiting block to an unconditional one. This is an optimization only; alternatively we could leave the conditional branches in place and rely on other passes to simplify the conditions. Logically, this is a separate step from connecting the latches to the headers, but it is convenient to fold them into the same loop, if the latch is also exiting. For headers (or other non-latch exiting blocks, this is done separately). Hopefully the patch with additional comments makes things a bit clearer. Reviewers: efriedma, dmgreen, hfinkel, Whitney Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D80544 --- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 80 +++++++++++------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 23b61c40a7567..d9323e70bef60 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -716,9 +716,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, - BasicBlock *BlockInLoop, - bool NeedConditional) { + auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop, + bool NeedConditional, bool ContinueOnTrue, + bool IsDestLoopExit) { auto *Term = cast(Src->getTerminator()); if (NeedConditional) { // Update the conditional branch's successor for the following @@ -726,7 +726,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit - if (Dest != LoopExit) { + if (!IsDestLoopExit) { BasicBlock *BB = Src; for (BasicBlock *Succ : successors(BB)) { // Preserve the incoming value from BB if we are jumping to the block @@ -743,29 +743,27 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } }; - // Now that all the basic blocks for the unrolled iterations are in place, - // set up the branches to connect them. - if (LatchIsExiting) { - // Set up latches to branch to the new header in the unrolled iterations or - // the loop exit for the last latch in a fully unrolled loop. - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - bool NeedConditional = true; + // Connect latches of the unrolled iterations to the headers of the next + // iteration. If the latch is also the exiting block, the conditional branch + // may have to be preserved. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = LatchIsExiting; - if (RuntimeTripCount && j != 0) { + if (LatchIsExiting) { + if (RuntimeTripCount && j != 0) NeedConditional = false; - } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll) { if (j == 0) Dest = LoopExit; - // If using trip count upper bound to completely unroll, we need to keep - // the conditional branch except the last one because the loop may exit - // after any iteration. + // If using trip count upper bound to completely unroll, we need to + // keep the conditional branch except the last one because the loop + // may exit after any iteration. assert(NeedConditional && "NeedCondition cannot be modified by both complete " "unrolling and runtime unrolling"); @@ -777,16 +775,18 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // unconditional branch for some iterations. NeedConditional = false; } - - setDest(Latches[i], Dest, Headers[i], NeedConditional); } - } else { - // Setup headers to branch to their new successors in the unrolled - // iterations. + + setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue, + Dest == LoopExit); + } + + if (!LatchIsExiting) { + // If the latch is not exiting, we may be able to simplify the conditional + // branches in the unrolled exiting blocks. for (unsigned i = 0, e = Headers.size(); i != e; ++i) { // The branch destination. unsigned j = (i + 1) % e; - BasicBlock *Dest = HeaderSucc[i]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) @@ -802,27 +802,19 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // unconditional branch for some iterations. NeedConditional = false; - setDest(Headers[i], Dest, HeaderSucc[i], NeedConditional); + // Conditional branches from non-latch exiting block have successors + // either in the same loop iteration or outside the loop. The branches are + // already correct. + if (NeedConditional) + continue; + setDest(Headers[i], HeaderSucc[i], HeaderSucc[i], NeedConditional, + ContinueOnTrue, false); } - // Set up latches to branch to the new header in the unrolled iterations or - // the loop exit for the last latch in a fully unrolled loop. - - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The original branch was replicated in each unrolled iteration. - BranchInst *Term = cast(Latches[i]->getTerminator()); - - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - - // When completely unrolling, the last latch becomes unreachable. - if (CompletelyUnroll && j == 0) - new UnreachableInst(Term->getContext(), Term); - else - // Replace the conditional branch with an unconditional one. - BranchInst::Create(Dest, Term); - + // When completely unrolling, the last latch becomes unreachable. + if (CompletelyUnroll) { + BranchInst *Term = cast(Latches.back()->getTerminator()); + new UnreachableInst(Term->getContext(), Term); Term->eraseFromParent(); } } From 512e806a33e80058a409d205a378a6e6fc2ef39d Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 26 May 2020 13:30:19 -0700 Subject: [PATCH 150/770] [AMDGPU] Bail alloca vectorization if GEP not found Differential Revision: https://reviews.llvm.org/D80587 --- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 13 ++++++++++--- .../AMDGPU/promote-alloca-vector-to-vector.ll | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 03e927b3cdc4d..036f5440dc75d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -339,7 +339,9 @@ static Value *stripBitcasts(Value *V) { static Value * calculateVectorIndex(Value *Ptr, const std::map &GEPIdx) { - GetElementPtrInst *GEP = cast(stripBitcasts(Ptr)); + GetElementPtrInst *GEP = dyn_cast(stripBitcasts(Ptr)); + if (!GEP) + return nullptr; auto I = GEPIdx.find(GEP); return I == GEPIdx.end() ? nullptr : I->second; @@ -496,10 +498,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) { if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy()) break; - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); + if (!Index) + break; + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); @@ -515,9 +519,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) { SI->getValueOperand()->getType()->isVectorTy()) break; - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); + if (!Index) + break; + + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *Elt = SI->getValueOperand(); diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll index 15da72db4abb7..da52bcee3637c 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll @@ -189,5 +189,23 @@ entry: ret void } +; GCN-LABEL: {{^}}ptr_alloca_bitcast: +; OPT-LABEL: define i64 @ptr_alloca_bitcast + +; GCN-NOT: buffer_ +; GCN: v_mov_b32_e32 v1, 0 + +; OPT: %private_iptr = alloca <2 x i32>, align 8, addrspace(5) +; OPT: %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)* +; OPT: %tmp1 = load i64, i64 addrspace(5)* %cast, align 8 + +define i64 @ptr_alloca_bitcast() { +entry: + %private_iptr = alloca <2 x i32>, align 8, addrspace(5) + %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)* + %tmp1 = load i64, i64 addrspace(5)* %cast, align 8 + ret i64 %tmp1 +} + declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workitem.id.y() From ecb66f50eeb73c32f8fd955a97bb070fbdd519ed Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 23 May 2020 22:47:21 -0700 Subject: [PATCH 151/770] [NFC, StackSafety] Move FunctionInfo into :: namespace --- .../llvm/Analysis/StackSafetyAnalysis.h | 13 ++- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 83 ++++++++++--------- 2 files changed, 49 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index c797d498b5dd8..9158f42481bf1 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -21,19 +21,18 @@ namespace llvm { /// Interface to access stack safety analysis results for single function. class StackSafetyInfo { public: - struct FunctionInfo; + struct InfoTy; private: - std::unique_ptr Info; + std::unique_ptr Info; public: - StackSafetyInfo(); - StackSafetyInfo(FunctionInfo &&Info); + StackSafetyInfo(InfoTy Info); StackSafetyInfo(StackSafetyInfo &&); StackSafetyInfo &operator=(StackSafetyInfo &&); ~StackSafetyInfo(); - FunctionInfo *getInfo() const { return Info.get(); } + const InfoTy &getInfo() const { return *Info; } // TODO: Add useful for client methods. void print(raw_ostream &O) const; @@ -60,13 +59,13 @@ class StackSafetyPrinterPass : public PassInfoMixin { /// StackSafetyInfo wrapper for the legacy pass manager class StackSafetyInfoWrapperPass : public FunctionPass { - StackSafetyInfo SSI; + Optional SSI; public: static char ID; StackSafetyInfoWrapperPass(); - const StackSafetyInfo &getResult() const { return SSI; } + const StackSafetyInfo &getResult() const { return *SSI; } void print(raw_ostream &O, const Module *M) const override; void getAnalysisUsage(AnalysisUsage &AU) const override; diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 91c52e1bb9ffd..b98a0e5880463 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -15,6 +15,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -143,10 +144,8 @@ uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) { return Size; } -} // end anonymous namespace - /// Describes uses of allocas and parameters inside of a single function. -struct StackSafetyInfo::FunctionInfo { +struct FunctionInfo { // May be a Function or a GlobalAlias const GlobalValue *GV = nullptr; // Informations about allocas uses. @@ -158,14 +157,11 @@ struct StackSafetyInfo::FunctionInfo { // StackSafetyDataFlowAnalysis counter stored here for faster access. int UpdateCount = 0; - FunctionInfo(const StackSafetyInfo &SSI) : FunctionInfo(*SSI.Info) {} - + FunctionInfo() = default; explicit FunctionInfo(const Function *F) : GV(F){}; // Creates FunctionInfo that forwards all the parameters to the aliasee. explicit FunctionInfo(const GlobalAlias *A); - FunctionInfo(FunctionInfo &&) = default; - bool IsDSOLocal() const { return GV->isDSOLocal(); }; bool IsInterposable() const { return GV->isInterposable(); }; @@ -184,12 +180,9 @@ struct StackSafetyInfo::FunctionInfo { for (auto &AS : Allocas) O << " " << AS << "\n"; } - -private: - FunctionInfo(const FunctionInfo &) = default; }; -StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) { +FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) { unsigned PointerSize = A->getParent()->getDataLayout().getPointerSizeInBits(); const GlobalObject *Aliasee = A->getBaseObject(); const FunctionType *Type = cast(Aliasee->getValueType()); @@ -201,6 +194,16 @@ StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) { } } +} // namespace + +struct StackSafetyInfo::InfoTy { + FunctionInfo Info; +}; + +StackSafetyInfo makeSSI(FunctionInfo Info) { + return StackSafetyInfo(StackSafetyInfo::InfoTy{std::move(Info)}); +} + namespace { class StackSafetyLocalAnalysis { @@ -232,7 +235,7 @@ class StackSafetyLocalAnalysis { UnknownRange(PointerSize, true) {} // Run the transformation on the associated function. - StackSafetyInfo run(); + FunctionInfo run(); }; ConstantRange @@ -382,8 +385,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { return true; } -StackSafetyInfo StackSafetyLocalAnalysis::run() { - StackSafetyInfo::FunctionInfo Info(&F); +FunctionInfo StackSafetyLocalAnalysis::run() { + FunctionInfo Info(&F); assert(!F.isDeclaration() && "Can't run StackSafety on a function declaration"); @@ -406,12 +409,11 @@ StackSafetyInfo StackSafetyLocalAnalysis::run() { LLVM_DEBUG(dbgs() << "[StackSafety] done\n"); LLVM_DEBUG(Info.print(dbgs())); - return StackSafetyInfo(std::move(Info)); + return Info; } class StackSafetyDataFlowAnalysis { - using FunctionMap = - std::map; + using FunctionMap = std::map; FunctionMap Functions; // Callee-to-Caller multimap. @@ -424,8 +426,7 @@ class StackSafetyDataFlowAnalysis { ConstantRange getArgumentAccessRange(const GlobalValue *Callee, unsigned ParamNo) const; bool updateOneUse(UseInfo &US, bool UpdateToFullSet); - void updateOneNode(const GlobalValue *Callee, - StackSafetyInfo::FunctionInfo &FS); + void updateOneNode(const GlobalValue *Callee, FunctionInfo &FS); void updateOneNode(const GlobalValue *Callee) { updateOneNode(Callee, Functions.find(Callee)->second); } @@ -440,12 +441,12 @@ class StackSafetyDataFlowAnalysis { public: StackSafetyDataFlowAnalysis( - Module &M, std::function FI); + Module &M, std::function FI); StackSafetyGlobalInfo run(); }; StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis( - Module &M, std::function FI) + Module &M, std::function FI) : PointerSize(M.getDataLayout().getPointerSizeInBits()), UnknownRange(PointerSize, true) { // Without ThinLTO, run the local analysis for every function in the TU and @@ -455,7 +456,7 @@ StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis( Functions.emplace(&F, FI(F)); for (auto &A : M.aliases()) if (isa(A.getBaseObject())) - Functions.emplace(&A, StackSafetyInfo::FunctionInfo(&A)); + Functions.emplace(&A, FunctionInfo(&A)); } ConstantRange @@ -465,7 +466,7 @@ StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee, // Unknown callee (outside of LTO domain or an indirect call). if (IT == Functions.end()) return UnknownRange; - const StackSafetyInfo::FunctionInfo &FS = IT->second; + const FunctionInfo &FS = IT->second; // The definition of this symbol may not be the definition in this linkage // unit. if (!FS.IsDSOLocal() || FS.IsInterposable()) @@ -495,8 +496,8 @@ bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US, return Changed; } -void StackSafetyDataFlowAnalysis::updateOneNode( - const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS) { +void StackSafetyDataFlowAnalysis::updateOneNode(const GlobalValue *Callee, + FunctionInfo &FS) { bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations; bool Changed = false; for (auto &AS : FS.Allocas) @@ -523,7 +524,7 @@ void StackSafetyDataFlowAnalysis::runDataFlow() { SmallVector Callees; for (auto &F : Functions) { Callees.clear(); - StackSafetyInfo::FunctionInfo &FS = F.second; + FunctionInfo &FS = F.second; for (auto &AS : FS.Allocas) for (auto &CS : AS.Use.Calls) Callees.push_back(CS.Callee); @@ -561,7 +562,7 @@ StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() { StackSafetyGlobalInfo SSI; for (auto &F : Functions) - SSI.emplace(F.first, std::move(F.second)); + SSI.emplace(F.first, makeSSI(F.second)); return SSI; } @@ -590,8 +591,8 @@ bool setStackSafetyMetadata(Module &M, const StackSafetyGlobalInfo &SSGI) { auto Iter = SSGI.find(&F); if (Iter == SSGI.end()) continue; - StackSafetyInfo::FunctionInfo *Summary = Iter->second.getInfo(); - for (auto &AS : Summary->Allocas) { + const FunctionInfo &Summary = Iter->second.getInfo().Info; + for (auto &AS : Summary.Allocas) { ConstantRange AllocaRange{APInt(Width, 0), APInt(Width, AS.Size)}; if (AllocaRange.contains(AS.Use.Range)) { AS.AI->setMetadata(M.getMDKindID("stack-safe"), @@ -605,23 +606,22 @@ bool setStackSafetyMetadata(Module &M, const StackSafetyGlobalInfo &SSGI) { } // end anonymous namespace -StackSafetyInfo::StackSafetyInfo() = default; StackSafetyInfo::StackSafetyInfo(StackSafetyInfo &&) = default; StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default; -StackSafetyInfo::StackSafetyInfo(FunctionInfo &&Info) - : Info(new FunctionInfo(std::move(Info))) {} +StackSafetyInfo::StackSafetyInfo(InfoTy Info) + : Info(new InfoTy(std::move(Info))) {} StackSafetyInfo::~StackSafetyInfo() = default; -void StackSafetyInfo::print(raw_ostream &O) const { Info->print(O); } +void StackSafetyInfo::print(raw_ostream &O) const { Info->Info.print(O); } AnalysisKey StackSafetyAnalysis::Key; StackSafetyInfo StackSafetyAnalysis::run(Function &F, FunctionAnalysisManager &AM) { StackSafetyLocalAnalysis SSLA(F, AM.getResult(F)); - return SSLA.run(); + return makeSSI(SSLA.run()); } PreservedAnalyses StackSafetyPrinterPass::run(Function &F, @@ -643,13 +643,13 @@ void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { } void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const { - SSI.print(O); + SSI->print(O); } bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) { StackSafetyLocalAnalysis SSLA( F, getAnalysis().getSE()); - SSI = StackSafetyInfo(SSLA.run()); + SSI = makeSSI(SSLA.run()); return false; } @@ -661,8 +661,8 @@ StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) { AM.getResult(M).getManager(); StackSafetyDataFlowAnalysis SSDFA( - M, [&FAM](Function &F) -> const StackSafetyInfo & { - return FAM.getResult(F); + M, [&FAM](Function &F) -> const FunctionInfo & { + return FAM.getResult(F).getInfo().Info; }); return SSDFA.run(); } @@ -702,8 +702,11 @@ void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage( bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) { StackSafetyDataFlowAnalysis SSDFA( - M, [this](Function &F) -> const StackSafetyInfo & { - return getAnalysis(F).getResult(); + M, [this](Function &F) -> const FunctionInfo & { + return getAnalysis(F) + .getResult() + .getInfo() + .Info; }); SSGI = SSDFA.run(); return SetMetadata ? setStackSafetyMetadata(M, SSGI) : false; From 9abb0e8d5be2ffad06ccfcc2d5530997ad093b81 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sun, 24 May 2020 02:44:31 -0700 Subject: [PATCH 152/770] [NFC, StackSafety] Remove unnecessary data --- .../llvm/Analysis/StackSafetyAnalysis.h | 3 +- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 164 +++++++++--------- 2 files changed, 82 insertions(+), 85 deletions(-) diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index 9158f42481bf1..33a4b2c149c36 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -35,7 +35,7 @@ class StackSafetyInfo { const InfoTy &getInfo() const { return *Info; } // TODO: Add useful for client methods. - void print(raw_ostream &O) const; + void print(raw_ostream &O, const GlobalValue &F) const; }; /// StackSafetyInfo wrapper for the new pass manager. @@ -60,6 +60,7 @@ class StackSafetyPrinterPass : public PassInfoMixin { /// StackSafetyInfo wrapper for the legacy pass manager class StackSafetyInfoWrapperPass : public FunctionPass { Optional SSI; + const Function *F = nullptr; public: static char ID; diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index b98a0e5880463..31f30d4b5d56f 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include @@ -98,35 +99,6 @@ raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) { return OS; } -struct AllocaInfo { - AllocaInst *AI = nullptr; - uint64_t Size = 0; - UseInfo Use; - - AllocaInfo(unsigned PointerSize, AllocaInst *AI, uint64_t Size) - : AI(AI), Size(Size), Use(PointerSize) {} - - StringRef getName() const { return AI->getName(); } -}; - -raw_ostream &operator<<(raw_ostream &OS, const AllocaInfo &A) { - return OS << A.getName() << "[" << A.Size << "]: " << A.Use; -} - -struct ParamInfo { - const Argument *Arg = nullptr; - UseInfo Use; - - explicit ParamInfo(unsigned PointerSize, const Argument *Arg) - : Arg(Arg), Use(PointerSize) {} - - StringRef getName() const { return Arg ? Arg->getName() : ""; } -}; - -raw_ostream &operator<<(raw_ostream &OS, const ParamInfo &P) { - return OS << P.getName() << "[]: " << P.Use; -} - /// Calculate the allocation size of a given alloca. Returns 0 if the /// size can not be statically determined. uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) { @@ -146,20 +118,16 @@ uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) { /// Describes uses of allocas and parameters inside of a single function. struct FunctionInfo { - // May be a Function or a GlobalAlias + SmallVector Allocas; + SmallVector Params; const GlobalValue *GV = nullptr; - // Informations about allocas uses. - SmallVector Allocas; - // Informations about parameters uses. - SmallVector Params; // TODO: describe return value as depending on one or more of its arguments. // StackSafetyDataFlowAnalysis counter stored here for faster access. int UpdateCount = 0; FunctionInfo() = default; - explicit FunctionInfo(const Function *F) : GV(F){}; - // Creates FunctionInfo that forwards all the parameters to the aliasee. + FunctionInfo(const Function *F) : GV(F){}; explicit FunctionInfo(const GlobalAlias *A); bool IsDSOLocal() const { return GV->isDSOLocal(); }; @@ -168,17 +136,36 @@ struct FunctionInfo { StringRef getName() const { return GV->getName(); } - void print(raw_ostream &O) const { + void print(raw_ostream &O, StringRef Name, const Function *F) const { // TODO: Consider different printout format after // StackSafetyDataFlowAnalysis. Calls and parameters are irrelevant then. - O << " @" << getName() << (IsDSOLocal() ? "" : " dso_preemptable") + O << " @" << Name << (IsDSOLocal() ? "" : " dso_preemptable") << (IsInterposable() ? " interposable" : "") << "\n"; + O << " args uses:\n"; - for (auto &P : Params) - O << " " << P << "\n"; + size_t Pos = 0; + for (auto &P : Params) { + StringRef Name = ""; + if (F) + Name = F->getArg(Pos)->getName(); + O << " " << Name << "[]: " << P << "\n"; + ++Pos; + } + O << " allocas uses:\n"; - for (auto &AS : Allocas) - O << " " << AS << "\n"; + if (F) { + size_t Pos = 0; + for (auto &I : instructions(F)) { + if (auto AI = dyn_cast(&I)) { + auto &AS = Allocas[Pos]; + O << " " << AI->getName() << "[" + << getStaticAllocaAllocationSize(AI) << "]: " << AS << "\n"; + ++Pos; + } + } + } else { + assert(Allocas.empty()); + } } }; @@ -188,8 +175,8 @@ FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) { const FunctionType *Type = cast(Aliasee->getValueType()); // 'Forward' all parameters to this alias to the aliasee for (unsigned ArgNo = 0; ArgNo < Type->getNumParams(); ArgNo++) { - Params.emplace_back(PointerSize, nullptr); - UseInfo &US = Params.back().Use; + Params.emplace_back(PointerSize); + UseInfo &US = Params.back(); US.Calls.emplace_back(Aliasee, ArgNo, ConstantRange(APInt(PointerSize, 0))); } } @@ -394,21 +381,20 @@ FunctionInfo StackSafetyLocalAnalysis::run() { for (auto &I : instructions(F)) { if (auto AI = dyn_cast(&I)) { - Info.Allocas.emplace_back(PointerSize, AI, - getStaticAllocaAllocationSize(AI)); - AllocaInfo &AS = Info.Allocas.back(); - analyzeAllUses(AI, AS.Use); + Info.Allocas.emplace_back(PointerSize); + UseInfo &AS = Info.Allocas.back(); + analyzeAllUses(AI, AS); } } for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) { - Info.Params.emplace_back(PointerSize, &A); - ParamInfo &PS = Info.Params.back(); - analyzeAllUses(&A, PS.Use); + Info.Params.emplace_back(PointerSize); + UseInfo &PS = Info.Params.back(); + analyzeAllUses(&A, PS); } + LLVM_DEBUG(Info.print(dbgs(), F.getName(), &F)); LLVM_DEBUG(dbgs() << "[StackSafety] done\n"); - LLVM_DEBUG(Info.print(dbgs())); return Info; } @@ -473,7 +459,7 @@ StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee, return UnknownRange; if (ParamNo >= FS.Params.size()) // possibly vararg return UnknownRange; - return FS.Params[ParamNo].Use.Range; + return FS.Params[ParamNo].Range; } bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US, @@ -501,14 +487,14 @@ void StackSafetyDataFlowAnalysis::updateOneNode(const GlobalValue *Callee, bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations; bool Changed = false; for (auto &AS : FS.Allocas) - Changed |= updateOneUse(AS.Use, UpdateToFullSet); + Changed |= updateOneUse(AS, UpdateToFullSet); for (auto &PS : FS.Params) - Changed |= updateOneUse(PS.Use, UpdateToFullSet); + Changed |= updateOneUse(PS, UpdateToFullSet); if (Changed) { LLVM_DEBUG(dbgs() << "=== update [" << FS.UpdateCount - << (UpdateToFullSet ? ", full-set" : "") << "] " - << FS.getName() << "\n"); + << (UpdateToFullSet ? ", full-set" : "") << "] " << &FS + << "\n"); // Callers of this function may need updating. for (auto &CallerID : Callers[Callee]) WorkList.insert(CallerID); @@ -526,10 +512,10 @@ void StackSafetyDataFlowAnalysis::runDataFlow() { Callees.clear(); FunctionInfo &FS = F.second; for (auto &AS : FS.Allocas) - for (auto &CS : AS.Use.Calls) + for (auto &CS : AS.Calls) Callees.push_back(CS.Callee); for (auto &PS : FS.Params) - for (auto &CS : PS.Use.Calls) + for (auto &CS : PS.Calls) Callees.push_back(CS.Callee); llvm::sort(Callees); @@ -566,22 +552,6 @@ StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() { return SSI; } -void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, const Module &M) { - size_t Count = 0; - for (auto &F : M.functions()) - if (!F.isDeclaration()) { - SSI.find(&F)->second.print(O); - O << "\n"; - ++Count; - } - for (auto &A : M.aliases()) { - SSI.find(&A)->second.print(O); - O << "\n"; - ++Count; - } - assert(Count == SSI.size() && "Unexpected functions in the result"); -} - bool setStackSafetyMetadata(Module &M, const StackSafetyGlobalInfo &SSGI) { bool Changed = false; unsigned Width = M.getDataLayout().getPointerSizeInBits(); @@ -592,12 +562,18 @@ bool setStackSafetyMetadata(Module &M, const StackSafetyGlobalInfo &SSGI) { if (Iter == SSGI.end()) continue; const FunctionInfo &Summary = Iter->second.getInfo().Info; - for (auto &AS : Summary.Allocas) { - ConstantRange AllocaRange{APInt(Width, 0), APInt(Width, AS.Size)}; - if (AllocaRange.contains(AS.Use.Range)) { - AS.AI->setMetadata(M.getMDKindID("stack-safe"), - MDNode::get(M.getContext(), None)); - Changed = true; + size_t Pos = 0; + for (auto &I : instructions(F)) { + if (auto AI = dyn_cast(&I)) { + auto &AS = Summary.Allocas[Pos]; + ConstantRange AllocaRange{ + APInt(Width, 0), APInt(Width, getStaticAllocaAllocationSize(AI))}; + if (AllocaRange.contains(AS.Range)) { + AI->setMetadata(M.getMDKindID("stack-safe"), + MDNode::get(M.getContext(), None)); + Changed = true; + } + ++Pos; } } } @@ -614,7 +590,26 @@ StackSafetyInfo::StackSafetyInfo(InfoTy Info) StackSafetyInfo::~StackSafetyInfo() = default; -void StackSafetyInfo::print(raw_ostream &O) const { Info->Info.print(O); } +void StackSafetyInfo::print(raw_ostream &O, const GlobalValue &F) const { + Info->Info.print(O, F.getName(), dyn_cast(&F)); +} + +static void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, + const Module &M) { + size_t Count = 0; + for (auto &F : M.functions()) + if (!F.isDeclaration()) { + SSI.find(&F)->second.print(O, F); + O << "\n"; + ++Count; + } + for (auto &A : M.aliases()) { + SSI.find(&A)->second.print(O, A); + O << "\n"; + ++Count; + } + assert(Count == SSI.size() && "Unexpected functions in the result"); +} AnalysisKey StackSafetyAnalysis::Key; @@ -627,7 +622,7 @@ StackSafetyInfo StackSafetyAnalysis::run(Function &F, PreservedAnalyses StackSafetyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { OS << "'Stack Safety Local Analysis' for function '" << F.getName() << "'\n"; - AM.getResult(F).print(OS); + AM.getResult(F).print(OS, F); return PreservedAnalyses::all(); } @@ -643,13 +638,14 @@ void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { } void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const { - SSI->print(O); + SSI->print(O, *F); } bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) { StackSafetyLocalAnalysis SSLA( F, getAnalysis().getSE()); SSI = makeSSI(SSLA.run()); + this->F = &F; return false; } From 6e39379bbbe1d8aba658f638dfc42f0ba0cbb926 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Wed, 20 May 2020 15:30:58 -0700 Subject: [PATCH 153/770] [DwarfExpression] Support entry values for indirect parameters Summary: A struct argument can be passed-by-value to a callee via a pointer to a temporary stack copy. Add support for emitting an entry value DBG_VALUE when an indirect parameter DBG_VALUE becomes unavailable. This is done by omitting DW_OP_stack_value from the entry value expression, to make the expression describe the location of an object. rdar://63373691 Reviewers: djtodoro, aprantl, dstenb Subscribers: hiraditya, lldb-commits, llvm-commits Tags: #lldb, #llvm Differential Revision: https://reviews.llvm.org/D80345 --- .../basic_entry_values/main.cpp | 28 +++++ llvm/docs/LangRef.rst | 8 +- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 7 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 +- .../CodeGen/AsmPrinter/DwarfExpression.cpp | 22 +++- llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h | 15 ++- llvm/lib/CodeGen/LiveDebugValues.cpp | 6 +- ...bgcall-site-indirect-param-with-offset.mir | 102 +++++++++++++++ .../AArch64/dbgcall-site-indirect-param.mir | 117 ++++++++++++++++++ 9 files changed, 289 insertions(+), 23 deletions(-) create mode 100644 llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param-with-offset.mir create mode 100644 llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param.mir diff --git a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp index c739a05f421e3..83f622cadf146 100644 --- a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp +++ b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp @@ -137,6 +137,30 @@ func14(int &sink, void (*target_no_tailcall)(int &, int)) { target_no_tailcall(sink, 123); } +/// A structure that is guaranteed -- when passed to a callee by value -- to be +/// passed via a pointer to a temporary copy in the caller. On x86_64 & aarch64 +/// only. +struct StructPassedViaPointerToTemporaryCopy { + // Under the 64-bit AAPCS, a struct larger than 16 bytes is not SROA'd, and + // is instead passed via pointer to a temporary copy. + long a, b, c; + StructPassedViaPointerToTemporaryCopy() : a(1), b(2), c(3) {} + + // Failing that, a virtual method forces passing via pointer to a temporary + // copy under the common calling conventions (e.g. 32/64-bit x86, Linux/Win, + // according to https://www.agner.org/optimize/calling_conventions.pdf). + virtual void add_vtable() {} +}; + +__attribute__((noinline)) void func15(StructPassedViaPointerToTemporaryCopy S) { + use(S); + use(dummy); + + ++global; + //% self.filecheck("expr S", "main.cpp", "-check-prefix=FUNC15-EXPR") + // FUNC15-EXPR: (a = 1, b = 2, c = 3) +} + __attribute__((disable_tail_calls)) int main() { int sink = 0; S1 s1; @@ -169,5 +193,9 @@ __attribute__((disable_tail_calls)) int main() { // Test that evaluation can "see through" an indirect tail call. func14(sink, func13); + // Test evaluation of an entry value that dereferences a temporary stack + // slot set up by the caller for a StructPassedViaPointerToTemporaryCopy. + func15(StructPassedViaPointerToTemporaryCopy()); + return 0; } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 01f41a7ea3f17..0891392b1e61e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5104,9 +5104,11 @@ The current supported opcode vocabulary is limited: ``DW_OP_LLVM_entry_value`` is only legal in MIR. The operation is introduced by the ``LiveDebugValues`` pass; currently only for function parameters that - are unmodified throughout a function and that are described as simple - register location descriptions. The operation is also introduced by the - ``AsmPrinter`` pass when a call site parameter value + are unmodified throughout a function. Support is limited to function + parameter that are described as simple register location descriptions, or as + indirect locations (e.g. when a struct is passed-by-value to a callee via a + pointer to a temporary copy made in the caller). The entry value op is also + introduced by the ``AsmPrinter`` pass when a call site parameter value (``DW_AT_call_site_parameter_value``) is represented as entry value of the parameter. - ``DW_OP_breg`` (or ``DW_OP_bregx``) represents a content on the provided diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 93bf9d6c2f715..dce90b3c17c0d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1285,15 +1285,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); const DIExpression *DIExpr = DV.getSingleExpression(); DwarfExpr.addFragmentOffset(DIExpr); - if (Location.isIndirect()) - DwarfExpr.setMemoryLocationKind(); + DwarfExpr.setLocation(Location, DIExpr); DIExpressionCursor Cursor(DIExpr); - if (DIExpr->isEntryValue()) { - DwarfExpr.setEntryValueFlag(); + if (DIExpr->isEntryValue()) DwarfExpr.beginEntryValueExpression(Cursor); - } const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 17fcf692d913d..953154f0b10b6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2399,14 +2399,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.addUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Location = Value.getLoc(); - if (Location.isIndirect()) - DwarfExpr.setMemoryLocationKind(); + DwarfExpr.setLocation(Location, DIExpr); DIExpressionCursor Cursor(DIExpr); - if (DIExpr->isEntryValue()) { - DwarfExpr.setEntryValueFlag(); + if (DIExpr->isEntryValue()) DwarfExpr.beginEntryValueExpression(Cursor); - } const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 69bc06cb94676..7b64c2238bd6a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -259,7 +259,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, if (isEntryValue()) finalizeEntryValue(); - if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4) + if (isEntryValue() && !isIndirect() && !isParameterValue() && + DwarfVersion >= 4) emitOp(dwarf::DW_OP_stack_value); DwarfRegs.clear(); @@ -318,6 +319,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } +void DwarfExpression::setEntryValueFlags(const MachineLocation &Loc) { + LocationFlags |= EntryValue; + if (Loc.isIndirect()) + LocationFlags |= Indirect; +} + +void DwarfExpression::setLocation(const MachineLocation &Loc, + const DIExpression *DIExpr) { + if (Loc.isIndirect()) + // Do not treat entry value descriptions of indirect parameters as memory + // locations. This allows DwarfExpression::addReg() to add DW_OP_regN to an + // entry value description. + if (!DIExpr->isEntryValue()) + setMemoryLocationKind(); + + if (DIExpr->isEntryValue()) + setEntryValueFlags(Loc); +} + void DwarfExpression::beginEntryValueExpression( DIExpressionCursor &ExprCursor) { auto Op = ExprCursor.take(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 5d43862827873..42be827cd5a09 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -30,6 +30,7 @@ class APInt; class DwarfCompileUnit; class DIELoc; class TargetRegisterInfo; +class MachineLocation; /// Holds a DIExpression and keeps track of how many operands have been consumed /// so far. @@ -142,14 +143,18 @@ class DwarfExpression { /// The kind of location description being produced. enum { Unknown = 0, Register, Memory, Implicit }; - /// The flags of location description being produced. - enum { EntryValue = 1, CallSiteParamValue }; + /// Additional location flags which may be combined with any location kind. + /// Currently, entry values are not supported for the Memory location kind. + enum { EntryValue = 1 << 0, Indirect = 1 << 1, CallSiteParamValue = 1 << 2 }; unsigned LocationKind : 3; - unsigned LocationFlags : 2; + unsigned LocationFlags : 3; unsigned DwarfVersion : 4; public: + /// Set the location (\p Loc) and \ref DIExpression (\p DIExpr) to describe. + void setLocation(const MachineLocation &Loc, const DIExpression *DIExpr); + bool isUnknownLocation() const { return LocationKind == Unknown; } bool isMemoryLocation() const { return LocationKind == Memory; } @@ -160,6 +165,8 @@ class DwarfExpression { bool isEntryValue() const { return LocationFlags & EntryValue; } + bool isIndirect() const { return LocationFlags & Indirect; } + bool isParameterValue() { return LocationFlags & CallSiteParamValue; } Optional TagOffset; @@ -296,7 +303,7 @@ class DwarfExpression { } /// Lock this down to become an entry value location. - void setEntryValueFlag() { LocationFlags |= EntryValue; } + void setEntryValueFlags(const MachineLocation &Loc); /// Lock this down to become a call site parameter location. void setCallSiteParamValueFlag() { LocationFlags |= CallSiteParamValue; } diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index 470cb227fe7c8..00a6149a05404 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -1613,10 +1613,6 @@ bool LiveDebugValues::isEntryValueCandidate( if (MI.getDebugLoc()->getInlinedAt()) return false; - // Do not consider indirect debug values (TODO: explain why). - if (MI.isIndirectDebugValue()) - return false; - // Only consider parameters that are described using registers. Parameters // that are passed on the stack are not yet supported, so ignore debug // values that are described by the frame or stack pointer. @@ -1631,7 +1627,7 @@ bool LiveDebugValues::isEntryValueCandidate( return false; // TODO: Add support for parameters that have a pre-existing debug expressions - // (e.g. fragments, or indirect parameters using DW_OP_deref). + // (e.g. fragments). if (MI.getDebugExpression()->getNumElements() > 0) return false; diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param-with-offset.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param-with-offset.mir new file mode 100644 index 0000000000000..ee3a8e8ae5211 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param-with-offset.mir @@ -0,0 +1,102 @@ +# RUN: llc -emit-call-site-info -start-before=livedebugvalues -stop-after=machineverifier -o - %s \ +# RUN: | FileCheck %s -check-prefix=MIR + +# Copied from dbgcall-site-indirect-param.mir, with hand modifications: +# an offset is added to the indirect parameter DBG_VALUE. +# +# We do not support emitting an entry value in this case. + +# MIR: renamable $w0 = LDRWui killed renamable $x8 +# MIR-NOT: DBG_VALUE $x0, 0, {{.*}}, !DIExpression(DW_OP_LLVM_entry_value +# MIR-NEXT: BL @baz +# MIR-NEXT: frame-destroy LDPXpost +# MIR-NEXT: TCRETURNdi @baz + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios10.0.0" + + %struct.fat_ptr = type { i32*, i32*, i32* } + + define i32 @bar(%struct.fat_ptr* nocapture readonly %f) local_unnamed_addr !dbg !13 { + entry: + call void @llvm.dbg.declare(metadata %struct.fat_ptr* %f, metadata !23, metadata !DIExpression()), !dbg !24 + %ptr2 = bitcast %struct.fat_ptr* %f to i32**, !dbg !25 + %0 = load i32*, i32** %ptr2, align 8, !dbg !25 + %1 = load i32, i32* %0, align 4, !dbg !31 + %call = tail call i32 @baz(i32 %1), !dbg !34 + %call1 = tail call i32 @baz(i32 %call), !dbg !35 + ret i32 %call1, !dbg !36 + } + + declare void @llvm.dbg.declare(metadata, metadata, metadata) + + declare !dbg !4 i32 @baz(i32) local_unnamed_addr optsize + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10, !11} + !llvm.ident = !{!12} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None, sysroot: "/") + !1 = !DIFile(filename: "indirect.c", directory: "/tmp/fatptr") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 4, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{!7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 7, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{i32 7, !"PIC Level", i32 2} + !12 = !{!"clang"} + !13 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !14, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !22) + !14 = !DISubroutineType(types: !15) + !15 = !{!7, !16} + !16 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "fat_ptr", file: !1, line: 1, size: 192, elements: !17) + !17 = !{!18, !20, !21} + !18 = !DIDerivedType(tag: DW_TAG_member, name: "ptr", scope: !16, file: !1, line: 2, baseType: !19, size: 64) + !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) + !20 = !DIDerivedType(tag: DW_TAG_member, name: "low", scope: !16, file: !1, line: 2, baseType: !19, size: 64, offset: 64) + !21 = !DIDerivedType(tag: DW_TAG_member, name: "high", scope: !16, file: !1, line: 2, baseType: !19, size: 64, offset: 128) + !22 = !{!23} + !23 = !DILocalVariable(name: "f", arg: 1, scope: !13, file: !1, line: 5, type: !16) + !24 = !DILocation(line: 5, column: 24, scope: !13) + !25 = !DILocation(line: 6, column: 23, scope: !13) + !31 = !DILocation(line: 6, column: 20, scope: !13) + !34 = !DILocation(line: 6, column: 16, scope: !13) + !35 = !DILocation(line: 6, column: 12, scope: !13) + !36 = !DILocation(line: 6, column: 5, scope: !13) + +... +--- +name: bar +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: + - { bb: 0, offset: 8, fwdArgRegs: + - { arg: 0, reg: '$w0' } } + - { bb: 0, offset: 10, fwdArgRegs: + - { arg: 0, reg: '$w0' } } +body: | + bb.0.entry: + liveins: $x0, $lr + + DBG_VALUE $x0, 0, !23, !DIExpression(DW_OP_plus_uconst, 12), debug-location !24 + early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0) + $fp = frame-setup ADDXri $sp, 0, 0 + frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + frame-setup CFI_INSTRUCTION offset $w30, -8, debug-location !25 + frame-setup CFI_INSTRUCTION offset $w29, -16, debug-location !25 + renamable $x8 = LDRXui killed renamable $x0, 0, debug-location !25 :: (load 8 from %ir.ptr2) + renamable $w0 = LDRWui killed renamable $x8, 0, debug-location !31 :: (load 4 from %ir.0) + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0, debug-location !34 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2, debug-location !35 :: (load 8 from %stack.1), (load 8 from %stack.0) + TCRETURNdi @baz, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, debug-location !35 + +... diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param.mir new file mode 100644 index 0000000000000..d7edd1c654adf --- /dev/null +++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-indirect-param.mir @@ -0,0 +1,117 @@ +# RUN: llc -emit-call-site-info -start-before=livedebugvalues -stop-after=machineverifier -o - %s \ +# RUN: | FileCheck %s -check-prefix=MIR + +# RUN: llc -emit-call-site-info -start-before=livedebugvalues -filetype=obj -o - %s \ +# RUN: | llvm-dwarfdump - | FileCheck %s -check-prefix=DWARF -implicit-check-not=DW_OP_entry_value + +# // Original Source +# struct fat_ptr { +# int *ptr, *low, *high; +# }; +# extern int baz(int x); +# int bar(struct fat_ptr f) { +# return baz(baz(*f.ptr)); +# } + +# MIR: renamable $w0 = LDRWui killed renamable $x8 +# MIR-NEXT: DBG_VALUE $x0, 0, {{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1) +# MIR-NEXT: BL @baz +# MIR-NEXT: frame-destroy LDPXpost +# MIR-NEXT: TCRETURNdi @baz + +# After w0 is clobbered, we should get an indirect parameter entry value for "f". + +# DWARF-LABEL: DW_TAG_formal_parameter +# DWARF-NEXT: DW_AT_location +# DWARF-NEXT: [0x0000000000000000, 0x0000000000000010): DW_OP_breg0 W0+0 +# DWARF-NEXT: [0x0000000000000010, 0x000000000000001c): DW_OP_entry_value(DW_OP_reg0 W0)) +# DWARF-NEXT: DW_AT_name ("f") + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios10.0.0" + + %struct.fat_ptr = type { i32*, i32*, i32* } + + define i32 @bar(%struct.fat_ptr* nocapture readonly %f) local_unnamed_addr !dbg !13 { + entry: + call void @llvm.dbg.declare(metadata %struct.fat_ptr* %f, metadata !23, metadata !DIExpression()), !dbg !24 + %ptr2 = bitcast %struct.fat_ptr* %f to i32**, !dbg !25 + %0 = load i32*, i32** %ptr2, align 8, !dbg !25 + %1 = load i32, i32* %0, align 4, !dbg !31 + %call = tail call i32 @baz(i32 %1), !dbg !34 + %call1 = tail call i32 @baz(i32 %call), !dbg !35 + ret i32 %call1, !dbg !36 + } + + declare void @llvm.dbg.declare(metadata, metadata, metadata) + + declare !dbg !4 i32 @baz(i32) local_unnamed_addr optsize + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10, !11} + !llvm.ident = !{!12} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None, sysroot: "/") + !1 = !DIFile(filename: "indirect.c", directory: "/tmp/fatptr") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 4, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{!7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 7, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{i32 7, !"PIC Level", i32 2} + !12 = !{!"clang"} + !13 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !14, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !22) + !14 = !DISubroutineType(types: !15) + !15 = !{!7, !16} + !16 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "fat_ptr", file: !1, line: 1, size: 192, elements: !17) + !17 = !{!18, !20, !21} + !18 = !DIDerivedType(tag: DW_TAG_member, name: "ptr", scope: !16, file: !1, line: 2, baseType: !19, size: 64) + !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) + !20 = !DIDerivedType(tag: DW_TAG_member, name: "low", scope: !16, file: !1, line: 2, baseType: !19, size: 64, offset: 64) + !21 = !DIDerivedType(tag: DW_TAG_member, name: "high", scope: !16, file: !1, line: 2, baseType: !19, size: 64, offset: 128) + !22 = !{!23} + !23 = !DILocalVariable(name: "f", arg: 1, scope: !13, file: !1, line: 5, type: !16) + !24 = !DILocation(line: 5, column: 24, scope: !13) + !25 = !DILocation(line: 6, column: 23, scope: !13) + !31 = !DILocation(line: 6, column: 20, scope: !13) + !34 = !DILocation(line: 6, column: 16, scope: !13) + !35 = !DILocation(line: 6, column: 12, scope: !13) + !36 = !DILocation(line: 6, column: 5, scope: !13) + +... +--- +name: bar +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: + - { bb: 0, offset: 8, fwdArgRegs: + - { arg: 0, reg: '$w0' } } + - { bb: 0, offset: 10, fwdArgRegs: + - { arg: 0, reg: '$w0' } } +body: | + bb.0.entry: + liveins: $x0, $lr + + DBG_VALUE $x0, 0, !23, !DIExpression(), debug-location !24 + early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0) + $fp = frame-setup ADDXri $sp, 0, 0 + frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + frame-setup CFI_INSTRUCTION offset $w30, -8, debug-location !25 + frame-setup CFI_INSTRUCTION offset $w29, -16, debug-location !25 + renamable $x8 = LDRXui killed renamable $x0, 0, debug-location !25 :: (load 8 from %ir.ptr2) + renamable $w0 = LDRWui killed renamable $x8, 0, debug-location !31 :: (load 4 from %ir.0) + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0, debug-location !34 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2, debug-location !35 :: (load 8 from %stack.1), (load 8 from %stack.0) + TCRETURNdi @baz, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, debug-location !35 + +... From 6a74ad6baad45b8572d196f7f290593ed62075b5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 May 2020 14:27:22 -0700 Subject: [PATCH 154/770] [sancov] Accommodate sancov and coverage report server for use under Windows Summary: This patch makes the following changes to SanCov and its complementary Python script in order to resolve issues pertaining to non-UNIX file paths in JSON symbolization information: * Convert all paths to use forward slash. * Update `coverage-report-server.py` to correctly handle paths to sources which contain spaces. * Remove Linux platform restriction for all SanCov unit tests. All SanCov tests passed when ran on my local Windows machine. Patch by Douglas Gliner. Reviewers: kcc, filcab, phosek, morehouse, vitalybuka, metzman Reviewed By: vitalybuka Subscribers: vsk, Dor1s, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D51018 --- llvm/test/tools/sancov/blacklist.test | 2 +- llvm/test/tools/sancov/covered_functions.test | 2 +- llvm/test/tools/sancov/merge.test | 2 +- llvm/test/tools/sancov/not_covered_functions.test | 2 +- llvm/test/tools/sancov/print.test | 2 +- llvm/test/tools/sancov/stats.test | 2 +- llvm/test/tools/sancov/symbolize.test | 8 +++++--- llvm/test/tools/sancov/symbolize_noskip_dead_files.test | 2 +- llvm/test/tools/sancov/validation.test | 2 +- llvm/tools/sancov/coverage-report-server.py | 6 ++++-- llvm/tools/sancov/sancov.cpp | 2 +- 11 files changed, 18 insertions(+), 14 deletions(-) diff --git a/llvm/test/tools/sancov/blacklist.test b/llvm/test/tools/sancov/blacklist.test index 53f48534dc955..a9cf47f53cdb5 100644 --- a/llvm/test/tools/sancov/blacklist.test +++ b/llvm/test/tools/sancov/blacklist.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target && host-byteorder-little-endian RUN: sancov -covered-functions %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s --check-prefix=ALL RUN: sancov -covered-functions -blacklist %p/Inputs/fun_blacklist.txt %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s RUN: sancov -covered-functions -blacklist %p/Inputs/src_blacklist.txt %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.1.sancov | FileCheck --check-prefix=CHECK1 %s diff --git a/llvm/test/tools/sancov/covered_functions.test b/llvm/test/tools/sancov/covered_functions.test index 8126049a0ca1d..bcdfaf8879d41 100644 --- a/llvm/test/tools/sancov/covered_functions.test +++ b/llvm/test/tools/sancov/covered_functions.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target && host-byteorder-little-endian RUN: sancov -covered-functions %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s RUN: sancov -covered-functions -strip_path_prefix=Inputs/ %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck --check-prefix=STRIP_PATH %s RUN: sancov -demangle=0 -covered-functions %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck --check-prefix=NO_DEMANGLE %s diff --git a/llvm/test/tools/sancov/merge.test b/llvm/test/tools/sancov/merge.test index 9c5ca9e6244ca..6c867654583ca 100644 --- a/llvm/test/tools/sancov/merge.test +++ b/llvm/test/tools/sancov/merge.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target RUN: sancov -merge %p/Inputs/test-linux_x86_64.0.symcov| FileCheck --check-prefix=MERGE1 %s RUN: sancov -merge %p/Inputs/test-linux_x86_64.0.symcov %p/Inputs/test-linux_x86_64.1.symcov| FileCheck --check-prefix=MERGE2 %s diff --git a/llvm/test/tools/sancov/not_covered_functions.test b/llvm/test/tools/sancov/not_covered_functions.test index 4e0e81a52c2c8..d1b91f6e56820 100644 --- a/llvm/test/tools/sancov/not_covered_functions.test +++ b/llvm/test/tools/sancov/not_covered_functions.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target && host-byteorder-little-endian RUN: sancov -skip-dead-files=0 -not-covered-functions %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s RUN: sancov -not-covered-functions %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.1.sancov | FileCheck --check-prefix=CHECK1 --allow-empty %s diff --git a/llvm/test/tools/sancov/print.test b/llvm/test/tools/sancov/print.test index fe94216b051a2..62ab3d991b8e3 100644 --- a/llvm/test/tools/sancov/print.test +++ b/llvm/test/tools/sancov/print.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target && host-byteorder-little-endian RUN: sancov -print %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s CHECK: 0x4e132b diff --git a/llvm/test/tools/sancov/stats.test b/llvm/test/tools/sancov/stats.test index 030d16a9dc616..46ff6e5e5db10 100644 --- a/llvm/test/tools/sancov/stats.test +++ b/llvm/test/tools/sancov/stats.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target && host-byteorder-little-endian RUN: sancov -print-coverage-stats %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s CHECK: all-edges: 8 diff --git a/llvm/test/tools/sancov/symbolize.test b/llvm/test/tools/sancov/symbolize.test index 3cc426f919b96..acf58ae117123 100644 --- a/llvm/test/tools/sancov/symbolize.test +++ b/llvm/test/tools/sancov/symbolize.test @@ -1,5 +1,6 @@ -REQUIRES: x86_64-linux -RUN: sancov -symbolize -strip_path_prefix="llvm/" %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s +REQUIRES: x86-registered-target && host-byteorder-little-endian +RUN: sancov -symbolize -strip_path_prefix="llvm/" %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s --check-prefixes=CHECK,STRIP +RUN: sancov -symbolize %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s --check-prefixes=CHECK,NOSTRIP CHECK: { CHECK-NEXT: "covered-points": [ @@ -11,7 +12,8 @@ CHECK-NEXT: "4e1586" CHECK-NEXT: ], CHECK-NEXT: "binary-hash": "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5", CHECK-NEXT: "point-symbol-info": { -CHECK-NEXT: "test/tools/sancov/Inputs/test.cpp": { +STRIP-NEXT: "test/tools/sancov/Inputs/test.cpp": { +NOSTRIP-NEXT: "/usr/local/google/home/aizatsky/src/llvm/test/tools/sancov/Inputs/test.cpp": { CHECK-NEXT: "bar(std::string)": { CHECK-NEXT: "4e132b": "12:0" CHECK-NEXT: }, diff --git a/llvm/test/tools/sancov/symbolize_noskip_dead_files.test b/llvm/test/tools/sancov/symbolize_noskip_dead_files.test index 9ddf89cbf56d2..0038ea197735a 100644 --- a/llvm/test/tools/sancov/symbolize_noskip_dead_files.test +++ b/llvm/test/tools/sancov/symbolize_noskip_dead_files.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target && host-byteorder-little-endian RUN: sancov -symbolize -skip-dead-files=0 -strip_path_prefix="llvm/" %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s CHECK: { diff --git a/llvm/test/tools/sancov/validation.test b/llvm/test/tools/sancov/validation.test index 437870cf597bf..fdcfd0610f82e 100644 --- a/llvm/test/tools/sancov/validation.test +++ b/llvm/test/tools/sancov/validation.test @@ -1,4 +1,4 @@ -REQUIRES: x86_64-linux +REQUIRES: x86-registered-target RUN: not sancov -covered-functions %p/Inputs/test-linux_x86_64 2>&1 | FileCheck --check-prefix=NOCFILE %s NOCFILE: WARNING: No coverage file for {{.*}}test-linux_x86_64 diff --git a/llvm/tools/sancov/coverage-report-server.py b/llvm/tools/sancov/coverage-report-server.py index 251d8f1b77bac..5ea978fae642a 100755 --- a/llvm/tools/sancov/coverage-report-server.py +++ b/llvm/tools/sancov/coverage-report-server.py @@ -32,6 +32,7 @@ import os import string import math +import urllib INDEX_PAGE_TMPL = """ @@ -128,6 +129,7 @@ class ServerHandler(http.server.BaseHTTPRequestHandler): src_path = None def do_GET(self): + norm_path = os.path.normpath(urllib.parse.unquote(self.path[1:])) if self.path == '/': self.send_response(200) self.send_header("Content-type", "text/html; charset=utf-8") @@ -147,8 +149,8 @@ def do_GET(self): response = string.Template(INDEX_PAGE_TMPL).safe_substitute( filenames='\n'.join(filelist)) self.wfile.write(response.encode('UTF-8', 'replace')) - elif self.symcov_data.has_file(self.path[1:]): - filename = self.path[1:] + elif self.symcov_data.has_file(norm_path): + filename = norm_path filepath = os.path.join(self.src_path, filename) if not os.path.exists(filepath): self.send_response(404) diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index ed384a2710072..6f949f2963658 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -471,7 +471,7 @@ static std::unique_ptr createSymbolizer() { static std::string normalizeFilename(const std::string &FileName) { SmallString<256> S(FileName); sys::path::remove_dots(S, /* remove_dot_dot */ true); - return stripPathPrefix(std::string(S)); + return stripPathPrefix(sys::path::convert_to_slash(std::string(S))); } class Blacklists { From 1e06b169be3e59799b8dcaf16d1d03bd4c12da42 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Fri, 22 May 2020 06:53:55 -0600 Subject: [PATCH 155/770] [clang][docs] Document additional bits of libc that -ffreestanding envs must provide Differential Revision: https://reviews.llvm.org/D80436 --- clang/docs/CommandGuide/clang.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst index 6947450beb43d..de0e0eda90974 100644 --- a/clang/docs/CommandGuide/clang.rst +++ b/clang/docs/CommandGuide/clang.rst @@ -246,7 +246,9 @@ Language Selection and Mode Options .. option:: -ffreestanding Indicate that the file should be compiled for a freestanding, not a hosted, - environment. + environment. Note that it is assumed that a freestanding environment will + additionally provide `memcpy`, `memmove`, `memset` and `memcmp` + implementations, as these are needed for efficient codegen for many programs. .. option:: -fno-builtin From 9eacda51fa23abf4f6503ff533dcb70071cbe569 Mon Sep 17 00:00:00 2001 From: Chris Jackson Date: Tue, 26 May 2020 22:33:59 +0100 Subject: [PATCH 156/770] [debuginfo] Fix broken tests from MachineLICM salvaging fix Previous commit: bd7ff5d94f - Added missing x86 triples - Added missing asserts --- llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir | 4 +++- llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir | 3 ++- llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir index 97cdea090c9c5..8b6e160cd92ae 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-post-regalloc.mir @@ -1,5 +1,6 @@ --- | - ; RUN: llc -start-before=phi-node-elimination -stop-after=machinelicm -o - %s | FileCheck %s + ; REQUIRES: asserts + ; RUN: llc -start-before=phi-node-elimination -stop-after=machinelicm -debug-only=machinelicm -o - %s | FileCheck %s ; Ensure we execute machinelicm post register allocation. ; Line numbers should not be retained when loop invariant instructions are hoisted. ; @@ -7,6 +8,7 @@ ; CHECK: MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @x, $noreg :: (load 8 from got) ; CHECK-LABEL: bb.1.while.body: ; + target triple = "x86_64-unknown-linux-gnu" @x = common local_unnamed_addr global i32 0, align 4, !dbg !0 diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir index 8c0eb376eb408..fa5da8f1fe4c8 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir @@ -1,4 +1,4 @@ ---- | +--- | ; RUN: llc -run-pass=machinelicm -o - %s | FileCheck %s ; Line numbers should not be retained when loop invariant instructions are hoisted. ; Doing so causes poor stepping bevavior. @@ -23,6 +23,7 @@ ; ; ModuleID = 'tx.ll' source_filename = "t.c" + target triple = "x86_64-unknown-linux-gnu" @x = common local_unnamed_addr global i32 0, align 4, !dbg !0 diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir index 7b5a19ffa9e7f..24fbe71b2a349 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-sink.mir @@ -4,6 +4,7 @@ ; CHECK: %0:gr64 = nuw ADD64ri8 %9, 4, implicit-def dead $eflags ; ; When instructions are sunk to prevent register spills, line numbers should not be retained. + target triple = "x86_64-unknown-linux-gnu" %struct.A = type { i32, i32, i32, i32, i32, i32 } From 5192783bb29c32196f87044de113fc43d7dfaae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= Date: Mon, 13 Apr 2020 20:51:27 +0200 Subject: [PATCH 157/770] [analyzer][RetainCount] Tie diagnostics to osx.cocoa.RetainCount rather then RetainCountBase, for the most part Similarly to other patches of mine, I'm trying to uniformize the checker interface so that dependency checkers don't emit diagnostics. The checker that made me most anxious so far was definitely RetainCount, because it is definitely impacted by backward compatibility concerns, and implements a checker hierarchy that is a lot different to other examples of similar size. Also, I don't have authority, nor expertise regarding ObjC related code, so I welcome any objection/discussion! Differential Revision: https://reviews.llvm.org/D78099 --- .../RetainCountChecker/RetainCountChecker.cpp | 71 ++- .../RetainCountChecker/RetainCountChecker.h | 32 +- .../RetainCountDiagnostics.cpp | 19 +- .../RetainCountDiagnostics.h | 17 +- .../Inputs/expected-plists/edges-new.mm.plist | 12 +- .../Inputs/expected-plists/objc-arc.m.plist | 24 +- .../objc-radar17039661.m.plist | 4 +- .../plist-output-alternate.m.plist | 4 +- .../expected-plists/plist-output.m.plist | 4 +- .../retain-release-path-notes.m.plist | 108 ++--- .../retain-release.m.objc.plist | 420 +++++++++--------- .../retain-release.m.objcpp.plist | 420 +++++++++--------- .../test/Analysis/incorrect-checker-names.mm | 2 +- .../Inputs/expected-plists/path-notes.m.plist | 4 +- .../Analysis/test-separate-retaincount.cpp | 14 +- 15 files changed, 591 insertions(+), 564 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp index 280d511e87c56..3f3267ff93916 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "RetainCountChecker.h" +#include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" using namespace clang; using namespace ento; using namespace retaincountchecker; -using llvm::StrInStrNoCase; REGISTER_MAP_WITH_PROGRAMSTATE(RefBindings, SymbolRef, RefVal) @@ -701,7 +701,7 @@ void RetainCountChecker::checkSummary(const RetainSummary &Summ, for (ProgramStateRef St : Out) { if (DeallocSent) { - C.addTransition(St, C.getPredecessor(), &DeallocSentTag); + C.addTransition(St, C.getPredecessor(), &getDeallocSentTag()); } else { C.addTransition(St); } @@ -844,13 +844,13 @@ RetainCountChecker::errorKindToBugKind(RefVal::Kind ErrorKind, SymbolRef Sym) const { switch (ErrorKind) { case RefVal::ErrorUseAfterRelease: - return useAfterRelease; + return *UseAfterRelease; case RefVal::ErrorReleaseNotOwned: - return releaseNotOwned; + return *ReleaseNotOwned; case RefVal::ErrorDeallocNotOwned: if (Sym->getType()->getPointeeCXXRecordDecl()) - return freeNotOwned; - return deallocNotOwned; + return *FreeNotOwned; + return *DeallocNotOwned; default: llvm_unreachable("Unhandled error."); } @@ -946,7 +946,7 @@ bool RetainCountChecker::evalCall(const CallEvent &Call, // Assume that output is zero on the other branch. NullOutputState = NullOutputState->BindExpr( CE, LCtx, C.getSValBuilder().makeNull(), /*Invalidate=*/false); - C.addTransition(NullOutputState, &CastFailTag); + C.addTransition(NullOutputState, &getCastFailTag()); // And on the original branch assume that both input and // output are non-zero. @@ -1095,7 +1095,7 @@ ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, if (N) { const LangOptions &LOpts = C.getASTContext().getLangOpts(); auto R = - std::make_unique(leakAtReturn, LOpts, N, Sym, C); + std::make_unique(*LeakAtReturn, LOpts, N, Sym, C); C.emitReport(std::move(R)); } return N; @@ -1120,7 +1120,7 @@ ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, ExplodedNode *N = C.addTransition(state, Pred, &ReturnNotOwnedTag); if (N) { auto R = std::make_unique( - returnNotOwnedForOwned, C.getASTContext().getLangOpts(), N, Sym); + *ReturnNotOwnedForOwned, C.getASTContext().getLangOpts(), N, Sym); C.emitReport(std::move(R)); } return N; @@ -1273,8 +1273,8 @@ RetainCountChecker::handleAutoreleaseCounts(ProgramStateRef state, os << "has a +" << V.getCount() << " retain count"; const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); - auto R = std::make_unique(overAutorelease, LOpts, N, Sym, - os.str()); + auto R = std::make_unique(*OverAutorelease, LOpts, N, Sym, + os.str()); Ctx.emitReport(std::move(R)); } @@ -1320,7 +1320,7 @@ RetainCountChecker::processLeaks(ProgramStateRef state, if (N) { for (SymbolRef L : Leaked) { - const RefCountBug &BT = Pred ? leakWithinFunction : leakAtReturn; + const RefCountBug &BT = Pred ? *LeakWithinFunction : *LeakAtReturn; Ctx.emitReport(std::make_unique(BT, LOpts, N, L, Ctx)); } } @@ -1473,19 +1473,39 @@ void RetainCountChecker::printState(raw_ostream &Out, ProgramStateRef State, // Checker registration. //===----------------------------------------------------------------------===// +std::unique_ptr RetainCountChecker::DeallocSentTag; +std::unique_ptr RetainCountChecker::CastFailTag; + void ento::registerRetainCountBase(CheckerManager &Mgr) { - Mgr.registerChecker(); + auto *Chk = Mgr.registerChecker(); + Chk->DeallocSentTag = + std::make_unique(Chk, "DeallocSent"); + Chk->CastFailTag = + std::make_unique(Chk, "DynamicCastFail"); } bool ento::shouldRegisterRetainCountBase(const CheckerManager &mgr) { return true; } - void ento::registerRetainCountChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker(); Chk->TrackObjCAndCFObjects = true; Chk->TrackNSCFStartParam = Mgr.getAnalyzerOptions().getCheckerBooleanOption( Mgr.getCurrentCheckerName(), "TrackNSCFStartParam"); + +#define INIT_BUGTYPE(KIND) \ + Chk->KIND = std::make_unique(Mgr.getCurrentCheckerName(), \ + RefCountBug::KIND); + // TODO: Ideally, we should have a checker for each of these bug types. + INIT_BUGTYPE(UseAfterRelease) + INIT_BUGTYPE(ReleaseNotOwned) + INIT_BUGTYPE(DeallocNotOwned) + INIT_BUGTYPE(FreeNotOwned) + INIT_BUGTYPE(OverAutorelease) + INIT_BUGTYPE(ReturnNotOwnedForOwned) + INIT_BUGTYPE(LeakWithinFunction) + INIT_BUGTYPE(LeakAtReturn) +#undef INIT_BUGTYPE } bool ento::shouldRegisterRetainCountChecker(const CheckerManager &mgr) { @@ -1495,6 +1515,29 @@ bool ento::shouldRegisterRetainCountChecker(const CheckerManager &mgr) { void ento::registerOSObjectRetainCountChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker(); Chk->TrackOSObjects = true; + + // FIXME: We want bug reports to always have the same checker name associated + // with them, yet here, if RetainCountChecker is disabled but + // OSObjectRetainCountChecker is enabled, the checker names will be different. + // This hack will make it so that the checker name depends on which checker is + // enabled rather than on the registration order. + // For the most part, we want **non-hidden checkers** to be associated with + // diagnostics, and **hidden checker options** with the fine-tuning of + // modeling. Following this logic, OSObjectRetainCountChecker should be the + // latter, but we can't just remove it for backward compatibility reasons. +#define LAZY_INIT_BUGTYPE(KIND) \ + if (!Chk->KIND) \ + Chk->KIND = std::make_unique(Mgr.getCurrentCheckerName(), \ + RefCountBug::KIND); + LAZY_INIT_BUGTYPE(UseAfterRelease) + LAZY_INIT_BUGTYPE(ReleaseNotOwned) + LAZY_INIT_BUGTYPE(DeallocNotOwned) + LAZY_INIT_BUGTYPE(FreeNotOwned) + LAZY_INIT_BUGTYPE(OverAutorelease) + LAZY_INIT_BUGTYPE(ReturnNotOwnedForOwned) + LAZY_INIT_BUGTYPE(LeakWithinFunction) + LAZY_INIT_BUGTYPE(LeakAtReturn) +#undef LAZY_INIT_BUGTYPE } bool ento::shouldRegisterOSObjectRetainCountChecker(const CheckerManager &mgr) { diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h index dd79bbef321c3..223e28c2c5b86 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h @@ -251,20 +251,20 @@ class RetainCountChecker eval::Assume, eval::Call > { - RefCountBug useAfterRelease{this, RefCountBug::UseAfterRelease}; - RefCountBug releaseNotOwned{this, RefCountBug::ReleaseNotOwned}; - RefCountBug deallocNotOwned{this, RefCountBug::DeallocNotOwned}; - RefCountBug freeNotOwned{this, RefCountBug::FreeNotOwned}; - RefCountBug overAutorelease{this, RefCountBug::OverAutorelease}; - RefCountBug returnNotOwnedForOwned{this, RefCountBug::ReturnNotOwnedForOwned}; - RefCountBug leakWithinFunction{this, RefCountBug::LeakWithinFunction}; - RefCountBug leakAtReturn{this, RefCountBug::LeakAtReturn}; - - CheckerProgramPointTag DeallocSentTag{this, "DeallocSent"}; - CheckerProgramPointTag CastFailTag{this, "DynamicCastFail"}; +public: + std::unique_ptr UseAfterRelease; + std::unique_ptr ReleaseNotOwned; + std::unique_ptr DeallocNotOwned; + std::unique_ptr FreeNotOwned; + std::unique_ptr OverAutorelease; + std::unique_ptr ReturnNotOwnedForOwned; + std::unique_ptr LeakWithinFunction; + std::unique_ptr LeakAtReturn; mutable std::unique_ptr Summaries; -public: + + static std::unique_ptr DeallocSentTag; + static std::unique_ptr CastFailTag; /// Track Objective-C and CoreFoundation objects. bool TrackObjCAndCFObjects = false; @@ -360,13 +360,11 @@ class RetainCountChecker CheckerContext &Ctx, ExplodedNode *Pred = nullptr) const; - const CheckerProgramPointTag &getDeallocSentTag() const { - return DeallocSentTag; + static const CheckerProgramPointTag &getDeallocSentTag() { + return *DeallocSentTag; } - const CheckerProgramPointTag &getCastFailTag() const { - return CastFailTag; - } + static const CheckerProgramPointTag &getCastFailTag() { return *CastFailTag; } private: /// Perform the necessary checks and state adjustments at the end of the diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index cfad47626354a..1d8ed90f7590c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -18,7 +18,7 @@ using namespace clang; using namespace ento; using namespace retaincountchecker; -StringRef RefCountBug::bugTypeToName(RefCountBug::RefCountBugType BT) { +StringRef RefCountBug::bugTypeToName(RefCountBug::RefCountBugKind BT) { switch (BT) { case UseAfterRelease: return "Use-after-release"; @@ -37,7 +37,7 @@ StringRef RefCountBug::bugTypeToName(RefCountBug::RefCountBugType BT) { case LeakAtReturn: return "Leak of returned object"; } - llvm_unreachable("Unknown RefCountBugType"); + llvm_unreachable("Unknown RefCountBugKind"); } StringRef RefCountBug::getDescription() const { @@ -60,13 +60,14 @@ StringRef RefCountBug::getDescription() const { case LeakAtReturn: return ""; } - llvm_unreachable("Unknown RefCountBugType"); + llvm_unreachable("Unknown RefCountBugKind"); } -RefCountBug::RefCountBug(const CheckerBase *Checker, RefCountBugType BT) +RefCountBug::RefCountBug(CheckerNameRef Checker, RefCountBugKind BT) : BugType(Checker, bugTypeToName(BT), categories::MemoryRefCount, - /*SuppressOnSink=*/BT == LeakWithinFunction || BT == LeakAtReturn), - BT(BT), Checker(Checker) {} + /*SuppressOnSink=*/BT == LeakWithinFunction || + BT == LeakAtReturn), + BT(BT) {} static bool isNumericLiteralExpression(const Expr *E) { // FIXME: This set of cases was copied from SemaExprObjC. @@ -453,8 +454,6 @@ RefCountReportVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) { const auto &BT = static_cast(BR.getBugType()); - const auto *Checker = - static_cast(BT.getChecker()); bool IsFreeUnowned = BT.getBugType() == RefCountBug::FreeNotOwned || BT.getBugType() == RefCountBug::DeallocNotOwned; @@ -545,11 +544,11 @@ RefCountReportVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, const ProgramPointTag *Tag = N->getLocation().getTag(); - if (Tag == &Checker->getCastFailTag()) { + if (Tag == &RetainCountChecker::getCastFailTag()) { os << "Assuming dynamic cast returns null due to type mismatch"; } - if (Tag == &Checker->getDeallocSentTag()) { + if (Tag == &RetainCountChecker::getDeallocSentTag()) { // We only have summaries attached to nodes after evaluating CallExpr and // ObjCMessageExprs. const Stmt *S = N->getLocation().castAs().getStmt(); diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h index e9e2777540548..286a8ae2ef7d7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h @@ -26,7 +26,7 @@ namespace retaincountchecker { class RefCountBug : public BugType { public: - enum RefCountBugType { + enum RefCountBugKind { UseAfterRelease, ReleaseNotOwned, DeallocNotOwned, @@ -36,21 +36,14 @@ class RefCountBug : public BugType { LeakWithinFunction, LeakAtReturn, }; - RefCountBug(const CheckerBase *checker, RefCountBugType BT); + RefCountBug(CheckerNameRef Checker, RefCountBugKind BT); StringRef getDescription() const; - RefCountBugType getBugType() const { - return BT; - } - - const CheckerBase *getChecker() const { - return Checker; - } + RefCountBugKind getBugType() const { return BT; } private: - RefCountBugType BT; - const CheckerBase *Checker; - static StringRef bugTypeToName(RefCountBugType BT); + RefCountBugKind BT; + static StringRef bugTypeToName(RefCountBugKind BT); }; class RefCountReport : public PathSensitiveBugReport { diff --git a/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist b/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist index b949e20ebbe86..74e11075fe3d4 100644 --- a/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist +++ b/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist @@ -2119,9 +2119,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb2b15a95787e594ff79f02c600e9d357 + issue_hash_content_of_line_in_context29a10ca4af622b6146ca082e49d919d6 issue_context_kindfunction issue_contextrdar8331641 issue_hash_function_offset2 @@ -11612,9 +11612,9 @@ descriptionPotential leak of an object stored into 'foo' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextef342aeb2f2719117ddd4ef1b72f5ba7 + issue_hash_content_of_line_in_contextf533db5cbb9c20d171f9f92105789dc4 issue_context_kindObjective-C method issue_contexttest2 issue_hash_function_offset2 @@ -21954,9 +21954,9 @@ descriptionPotential leak of an object stored into 'foo' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextf81f51dd154d0a11cab412a1cd1cd095 + issue_hash_content_of_line_in_context5616a7601faa1a8c2ac56fa1b595b172 issue_context_kindfunction issue_contextlongLines issue_hash_function_offset1 diff --git a/clang/test/Analysis/Inputs/expected-plists/objc-arc.m.plist b/clang/test/Analysis/Inputs/expected-plists/objc-arc.m.plist index 574575b6d25a3..d3a1a5c6c47fd 100644 --- a/clang/test/Analysis/Inputs/expected-plists/objc-arc.m.plist +++ b/clang/test/Analysis/Inputs/expected-plists/objc-arc.m.plist @@ -312,9 +312,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7bd4a6e187407677b2d9e717576818bf + issue_hash_content_of_line_in_context61d185b2522d15fb327f6784e0217adf issue_context_kindfunction issue_contexttest_cf_leak issue_hash_function_offset2 @@ -843,9 +843,9 @@ descriptionPotential leak of an object stored into 'obj5' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0aed4f65cb3dba7331f9319fd1ceb003 + issue_hash_content_of_line_in_context5baa7d5f38420d0a035aa61607675f3e issue_context_kindfunction issue_contextfrom_cf issue_hash_function_offset7 @@ -989,9 +989,9 @@ descriptionPotential leak of an object stored into 'obj6' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0851961d40a4c8331ebe713f4a3e05f4 + issue_hash_content_of_line_in_context4665e04694fd55e7c4ed7a67860b3b74 issue_context_kindfunction issue_contextfrom_cf issue_hash_function_offset8 @@ -1423,9 +1423,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context00045bff3b7c26fe7cb80a71f512575c + issue_hash_content_of_line_in_context798e65f80df0526369f9bb240e3d91fd issue_context_kindfunction issue_contexttest_objc_unretainedObject issue_hash_function_offset2 @@ -1734,9 +1734,9 @@ descriptionPotential leak of an object of type 'CFStringRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9f258122568ea8763047e98db8a52647 + issue_hash_content_of_line_in_contexte1fbcc142b678b3c2c43737ee35b64d9 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset24 @@ -1928,9 +1928,9 @@ descriptionPotential leak of an object stored into 'o' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context8187b0ba5cadd42594120fe05d871502 + issue_hash_content_of_line_in_contexte300a279615a384d2b310329651d3978 issue_context_kindfunction issue_contextrdar11059275_positive issue_hash_function_offset1 diff --git a/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist b/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist index 3c87e3909bec5..23bd69851c0be 100644 --- a/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist +++ b/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist @@ -1329,9 +1329,9 @@ descriptionPotential leak of an object of type 'NSNumber *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc204ce6cce660a7714c801bdf9183431 + issue_hash_content_of_line_in_context500e2bbda41c8086771ad98b6bcfdc50 location line53 diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-output-alternate.m.plist b/clang/test/Analysis/Inputs/expected-plists/plist-output-alternate.m.plist index 53bc4cb66ef91..1c8d962100c1f 100644 --- a/clang/test/Analysis/Inputs/expected-plists/plist-output-alternate.m.plist +++ b/clang/test/Analysis/Inputs/expected-plists/plist-output-alternate.m.plist @@ -1485,9 +1485,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb2b15a95787e594ff79f02c600e9d357 + issue_hash_content_of_line_in_context29a10ca4af622b6146ca082e49d919d6 issue_context_kindfunction issue_contextrdar8331641 issue_hash_function_offset2 diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist b/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist index 9203e48c46835..76fec546267cd 100644 --- a/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist +++ b/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist @@ -2372,9 +2372,9 @@ descriptionPotential leak of an object stored into 'foo' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextef342aeb2f2719117ddd4ef1b72f5ba7 + issue_hash_content_of_line_in_contextf533db5cbb9c20d171f9f92105789dc4 issue_context_kindObjective-C method issue_contexttest2 issue_hash_function_offset2 diff --git a/clang/test/Analysis/Inputs/expected-plists/retain-release-path-notes.m.plist b/clang/test/Analysis/Inputs/expected-plists/retain-release-path-notes.m.plist index 2d67e6e34e123..71ccd79bf3a7b 100644 --- a/clang/test/Analysis/Inputs/expected-plists/retain-release-path-notes.m.plist +++ b/clang/test/Analysis/Inputs/expected-plists/retain-release-path-notes.m.plist @@ -104,9 +104,9 @@ descriptionPotential leak of an object stored into 'leaked' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextfc2476fe550128eebe2a0a8fa4299a59 + issue_hash_content_of_line_in_contextd21e9660cc6434ef84a51f39ffcdce86 issue_context_kindfunction issue_contextcreationViaAlloc issue_hash_function_offset1 @@ -225,9 +225,9 @@ descriptionPotential leak of an object stored into 'leaked' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context31ad4a19f94c8994ebf7e887ed4ab840 + issue_hash_content_of_line_in_contextf8ec2601a04113e567aa1d09c9902c91 issue_context_kindfunction issue_contextcreationViaCFCreate issue_hash_function_offset1 @@ -571,9 +571,9 @@ descriptionPotential leak of an object stored into 'leaked' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1b654ea7bbef1493beda9e0a667dd859 + issue_hash_content_of_line_in_contextdd26a8ad9a7a057feaa636974b43ccb0 issue_context_kindfunction issue_contextacquisitionViaMethod issue_hash_function_offset1 @@ -770,9 +770,9 @@ descriptionPotential leak of an object stored into 'leaked' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3fc42b0b859923347e789ad601d29b2a + issue_hash_content_of_line_in_context2f2de5d7fe728958585598b619069e5a issue_context_kindfunction issue_contextacquisitionViaProperty issue_hash_function_offset1 @@ -967,9 +967,9 @@ descriptionPotential leak of an object stored into 'leaked' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0b4d42c9cc01d55bc281c067f1cc1c3d + issue_hash_content_of_line_in_context1c02b65e83dad1b22270ff5a71de3118 issue_context_kindfunction issue_contextacquisitionViaCFFunction issue_hash_function_offset1 @@ -1164,9 +1164,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextbaa3d5ecb7824a6997e0734ad148ec55 + issue_hash_content_of_line_in_context03c23f0f82d7f2fd880a22e0d9cf14b9 issue_context_kindfunction issue_contextexplicitDealloc issue_hash_function_offset3 @@ -1361,9 +1361,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextce73a05e0a1055b4b451f5015edbd6ec + issue_hash_content_of_line_in_context6f1b3f0c6c7f79f1af9b313273a01e92 issue_context_kindfunction issue_contextimplicitDealloc issue_hash_function_offset3 @@ -1633,9 +1633,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb8cbd4dae812cd8d8faaf3b48dad2021 + issue_hash_content_of_line_in_contextcb5e4205a8f925230a70715914a2e3d2 issue_context_kindfunction issue_contextoverAutorelease issue_hash_function_offset4 @@ -1831,9 +1831,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextee96f7e22e32b24d677efa45b2395915 + issue_hash_content_of_line_in_context1edd178e5ad76c79ce9812f519e8f467 issue_context_kindfunction issue_contextautoreleaseUnowned issue_hash_function_offset3 @@ -1953,9 +1953,9 @@ descriptionPotential leak of an object stored into 'leaked' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context12887d3520c4c9fd03995feeb69967ec + issue_hash_content_of_line_in_context3f08690fae9687c29bb23b7a7cb7995b issue_context_kindfunction issue_contextmakeCollectableIgnored issue_hash_function_offset1 @@ -2076,9 +2076,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd715154641c7b248d401df12c1ce0808 + issue_hash_content_of_line_in_context4b621ab5f8f2ef9240699119f4d874cb issue_context_kindfunction issue_contextCFCopyRuleViolation issue_hash_function_offset2 @@ -2197,9 +2197,9 @@ descriptionPotential leak of an object stored into 'object' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context58d56f1d5982f5923ab07900852ea30c + issue_hash_content_of_line_in_context5248d2310322982d02e5f3d564249b4f issue_context_kindfunction issue_contextCFGetRuleViolation issue_hash_function_offset1 @@ -2318,9 +2318,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextcc20c23c14b2363ca453c24ede3bc38d + issue_hash_content_of_line_in_context4f23ad2725fb68134cec8b8354cd295c issue_context_kindObjective-C method issue_contextcopyViolation issue_hash_function_offset2 @@ -2439,9 +2439,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context4eefa164042de89f947573c1df2fce03 + issue_hash_content_of_line_in_contextda1dab126ed46b144040160ae8628460 issue_context_kindObjective-C method issue_contextcopyViolationIndexedSubscript issue_hash_function_offset2 @@ -2560,9 +2560,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte8ad4d8a073872a91d2b0225319cd521 + issue_hash_content_of_line_in_context52877f9471b1ecdaf213b39016b84e52 issue_context_kindObjective-C method issue_contextcopyViolationKeyedSubscript issue_hash_function_offset2 @@ -2681,9 +2681,9 @@ descriptionPotential leak of an object stored into 'result' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextf858bd7c1720b43bd464bbec97a1cb6b + issue_hash_content_of_line_in_contextcf8c65a18ad9982cb9848a266cd9c61b issue_context_kindObjective-C method issue_contextgetViolation issue_hash_function_offset1 @@ -2877,9 +2877,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context4da16a9c4c9d9587418f276359c5f098 + issue_hash_content_of_line_in_contexte7b798151545b45a994592df0d27d250 issue_context_kindObjective-C method issue_contextcopyAutorelease issue_hash_function_offset3 @@ -2999,9 +2999,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context18ba6f4fe59b182bee196c1a976e3aa2 + issue_hash_content_of_line_in_context4e0c810e2b301aca3f636ad7e3d6b0b8 issue_context_kindfunction issue_contexttestNumericLiteral issue_hash_function_offset2 @@ -3120,9 +3120,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextac4375d1ab6887c27055ee00b20a212e + issue_hash_content_of_line_in_context1d054002016aa4360aaf23a4c4d8fbb7 issue_context_kindfunction issue_contexttestBoxedInt issue_hash_function_offset2 @@ -3241,9 +3241,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextcd2f260edad8ce1826b21acc49cba277 + issue_hash_content_of_line_in_context67ca92144b05322ee4569aea88d08595 issue_context_kindfunction issue_contexttestBoxedString issue_hash_function_offset2 @@ -3362,9 +3362,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte60765ef00b3af982aacd5471a2cdb21 + issue_hash_content_of_line_in_context32fcec71872b8f62d8d7b1b05284b0fe issue_context_kindfunction issue_contexttestArray issue_hash_function_offset2 @@ -3483,9 +3483,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context42da4f0388822b235ed56427f2e1ac1b + issue_hash_content_of_line_in_contextd9584825bb1e62066879949e3ade8570 issue_context_kindfunction issue_contexttestDictionary issue_hash_function_offset2 @@ -3841,9 +3841,9 @@ descriptionPotential leak of an object of type 'MyObj *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb5589615cea2321192e477d2011edf09 + issue_hash_content_of_line_in_contexteef2aef4b58abf21fcfa4bbf69e19c02 issue_context_kindObjective-C method issue_contexttest issue_hash_function_offset2 @@ -4240,9 +4240,9 @@ descriptionPotential leak of an object stored into 'y' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb319657460942b0e8deafb79876d5479 + issue_hash_content_of_line_in_context8c27524f691296551f9e52856b824326 issue_context_kindObjective-C method issue_contexttest issue_hash_function_offset8 @@ -4518,9 +4518,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context8e06af66dd0b414c095c951ac1f2cc68 + issue_hash_content_of_line_in_context4fc36e73ba317d307dc9cc4b3d62fd0a issue_context_kindfunction issue_contextCFOverAutorelease issue_hash_function_offset4 @@ -4716,9 +4716,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context06eeb988e43f885cb575eba46e7ccf8f + issue_hash_content_of_line_in_context08e6a3931d34cda45c09dfda76976e17 issue_context_kindfunction issue_contextCFAutoreleaseUnowned issue_hash_function_offset3 @@ -4988,9 +4988,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte1b335bbbaad2a9c427e681a6fac6562 + issue_hash_content_of_line_in_contextd9bb23a5435fe15df9d7ffdc27a8a072 issue_context_kindfunction issue_contextCFAutoreleaseUnownedMixed issue_hash_function_offset4 diff --git a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist index 74e8dd606a2d9..8b5ab23df9ed6 100644 --- a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist +++ b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist @@ -397,9 +397,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1089a297e77ff0c9d2d55cfb3aae26d3 + issue_hash_content_of_line_in_context5928b2a4699cbae0686391c20e639007 issue_context_kindfunction issue_contextf1 issue_hash_function_offset7 @@ -816,9 +816,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextbb12c99d56657635b20d4a0801590eed + issue_hash_content_of_line_in_context6b2e175938153ac041f52ebbf50b1f43 issue_context_kindfunction issue_contextf2 issue_hash_function_offset7 @@ -1107,9 +1107,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0e9bb151f425535a0ec1b0bf0574dd7d + issue_hash_content_of_line_in_context3fdbd844ddb925306ba2bb1b3626f310 issue_context_kindfunction issue_contextf5 issue_hash_function_offset2 @@ -1305,9 +1305,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextad4b758c93bbe7feeee349a526293527 + issue_hash_content_of_line_in_context8529da75e357c59fb0a7fefb0b6e0952 issue_context_kindfunction issue_contextf6 issue_hash_function_offset1 @@ -1502,9 +1502,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2a319c210c1c5b4274e3f28931ead03b + issue_hash_content_of_line_in_contexteb0faa12081b1e28b218e4c6e53d57ec issue_context_kindfunction issue_contextf7 issue_hash_function_offset1 @@ -1659,9 +1659,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2c347e0a0af508867a6d854a3fc8f690 + issue_hash_content_of_line_in_context404d4de8faa444bc52fd510380bd0a63 issue_context_kindfunction issue_contextf7 issue_hash_function_offset3 @@ -1857,9 +1857,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0be746eb38e868156f7f57ea95735f4e + issue_hash_content_of_line_in_context251dff6727b3d99ec95caa28672669ea issue_context_kindfunction issue_contextf8 issue_hash_function_offset1 @@ -2562,9 +2562,9 @@ descriptionPotential leak of an object stored into 'disk' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3e83186b5b944ef7a3ec026d469d5ad7 + issue_hash_content_of_line_in_context69ae08a90fe52a921ed423df38ed7480 issue_context_kindfunction issue_contextf10 issue_hash_function_offset1 @@ -3045,9 +3045,9 @@ descriptionPotential leak of an object stored into 'dict' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextffc6479dc21fc10cdb83b4392685ed36 + issue_hash_content_of_line_in_contexta7f8c63b1cdc39df79b7457e27ff4930 issue_context_kindfunction issue_contextf10 issue_hash_function_offset7 @@ -3660,9 +3660,9 @@ descriptionPotential leak of an object stored into 'disk' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1c06fc99a1d078653ae8e4fe308e09cd + issue_hash_content_of_line_in_contextcace8e35bed93ecdfa0455ac166aaa97 issue_context_kindfunction issue_contextf10 issue_hash_function_offset10 @@ -4345,9 +4345,9 @@ descriptionPotential leak of an object stored into 'disk' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context460f099c6ae21a4b3ae818c9f65df2b0 + issue_hash_content_of_line_in_context778f70549a15e78703b4dcb3a287df33 issue_context_kindfunction issue_contextf10 issue_hash_function_offset4 @@ -5162,9 +5162,9 @@ descriptionPotential leak of an object stored into 'dissenter' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context65004e269b1b5cb5d9b5c6f7a02926e3 + issue_hash_content_of_line_in_context6c188b4716e84cdc55b93d40e6c2daf3 issue_context_kindfunction issue_contextf10 issue_hash_function_offset13 @@ -6044,9 +6044,9 @@ descriptionPotential leak of an object stored into 'session' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte9c1be038ef498b7985f5b1ddcb5444f + issue_hash_content_of_line_in_context35b9ac7ff198890c88d5839a898b7fea issue_context_kindfunction issue_contextf10 issue_hash_function_offset17 @@ -6161,9 +6161,9 @@ descriptionPotential leak of an object stored into 'f' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9c7c3b2bf298c7d046fd6fc7f6fe688e + issue_hash_content_of_line_in_context17d84d673b35235b52d8f8f00c1d1eea issue_context_kindfunction issue_contexttestLeakCoreMediaReferenceType issue_hash_function_offset1 @@ -6282,9 +6282,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context69932084739a429d667d8de6de42af0b + issue_hash_content_of_line_in_context1702285448a953b02ab74a8eb9a610d9 issue_context_kindfunction issue_contexttestOverReleaseMediaReferenceType issue_hash_function_offset2 @@ -6674,9 +6674,9 @@ descriptionPotential leak of an object stored into 'buffer' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0f30258c45ed9ecd8646db90eaf20c4a + issue_hash_content_of_line_in_context402566b4ddf1683dac1aefc1ab3e76e9 issue_context_kindfunction issue_contexttestCMBufferQueueDequeueAndRetain issue_hash_function_offset1 @@ -6829,9 +6829,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context13e672795c0e57433c642c84f26f6c9b + issue_hash_content_of_line_in_context143ef5974bfece95e9894da5250aaff0 issue_context_kindfunction issue_contextf11 issue_hash_function_offset21 @@ -6941,9 +6941,9 @@ descriptionPotential leak of an object stored into 'o' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexteeff9e133573bdbc1aeb633284cbdb2b + issue_hash_content_of_line_in_contextaf4ad99c5fb565d82e1b4848aaca4e24 issue_context_kindfunction issue_contextf12 issue_hash_function_offset1 @@ -7197,9 +7197,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context620a4245edc8df18036da34702ca01c8 + issue_hash_content_of_line_in_context58a0b3f8332f42561f89b11f6eb5e91f issue_context_kindfunction issue_contextf13_autorelease_b issue_hash_function_offset4 @@ -7470,9 +7470,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1a87a5f904c165069a731b0325d45edf + issue_hash_content_of_line_in_context612dc6574d54c8010703a9776d8a4a0a issue_context_kindfunction issue_contextf13_autorelease_c issue_hash_function_offset4 @@ -7777,9 +7777,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6ed645efdfe968f31d4356610bb6dd02 + issue_hash_content_of_line_in_contextc57037289bc3acc586de325df25951ed issue_context_kindfunction issue_contextf13_autorelease_d issue_hash_function_offset4 @@ -7885,9 +7885,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5295be41524e9e28f4b1a608006801fe + issue_hash_content_of_line_in_context6abb479bc4c7782a125d680fddf825ef issue_context_kindfunction issue_contextf14_leakimmediately issue_hash_function_offset1 @@ -8891,9 +8891,9 @@ descriptionPotential leak of an object stored into 'bmap' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2e5affde083280f6d31ed412ac8c2396 + issue_hash_content_of_line_in_context2cfebefee7b63ce3954419e571be4f63 issue_context_kindfunction issue_contextf18 issue_hash_function_offset2 @@ -9012,9 +9012,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextfdd0cb02c08c718da2686b6e0f04aad7 + issue_hash_content_of_line_in_contextdcd3becc58a149abe6ade5598138d3dd issue_context_kindObjective-C method issue_contextnewString issue_hash_function_offset2 @@ -9230,9 +9230,9 @@ descriptionPotential leak of an object stored into 'kind' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context03f39b74e1ccafa9c613ba4bb71de560 + issue_hash_content_of_line_in_context6688c9cb12f0c76ec80eb03b1d2eddf8 issue_context_kindfunction issue_contextrdar_6659160 issue_hash_function_offset5 @@ -10529,9 +10529,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc8a4713a734a4f6e747423ef88af6bf8 + issue_hash_content_of_line_in_contextd04966e9b8e981d8f69bf03823253033 issue_context_kindfunction issue_contextrdar_6659160 issue_hash_function_offset33 @@ -10737,9 +10737,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context83c7891609f8efb616060d0c6ae6bb43 + issue_hash_content_of_line_in_context1b35183a6aca4df5a8732c8da94e3205 issue_context_kindfunction issue_contextpr3820_ReleaseAfterDealloc issue_hash_function_offset3 @@ -10969,9 +10969,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9fe338c720f25b3b1d5a68930d3ae4b8 + issue_hash_content_of_line_in_context54f2bd1534fa675b58c4f8eef3120373 issue_context_kindfunction issue_contextpr3820_DeallocAfterRelease issue_hash_function_offset4 @@ -11221,9 +11221,9 @@ descriptionPotential leak of an object stored into 'dict' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextdf3400f53fc437aede21f685ca1955d4 + issue_hash_content_of_line_in_context055e6f3413539276fedeac241fccd9b8 issue_context_kindObjective-C method issue_contextapplicationDidFinishLaunching: issue_hash_function_offset1 @@ -11535,9 +11535,9 @@ descriptionPotential leak of an object stored into 'dict' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5104ca579763af0f8c66da3fdc42b95f + issue_hash_content_of_line_in_context444f6019b048a95dd71c6be49ecb73ff issue_context_kindObjective-C method issue_contextradar10102244 issue_hash_function_offset1 @@ -11691,9 +11691,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexta4a85a3991cb3888217d5c62346107dc + issue_hash_content_of_line_in_context641de26edd3d85ca241de577afbcda86 issue_context_kindfunction issue_contextrdar_6257780_Case1 issue_hash_function_offset3 @@ -11847,9 +11847,9 @@ descriptionPotential leak of an object of type 'RDar6320065Subclass *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context75b7ad344b1d4665d918188bd10429df + issue_hash_content_of_line_in_context8e8ae80fd006f27a952f77494bd1c05f issue_context_kindObjective-C method issue_context_initReturningNewClassBad issue_hash_function_offset2 @@ -12044,9 +12044,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context791e285d27d610c4c016065dd5addd37 + issue_hash_content_of_line_in_context625e26ef3ae9de238f30175e4e9f4937 issue_context_kindObjective-C method issue_contextinitReturningNewClassBad2 issue_hash_function_offset3 @@ -12132,9 +12132,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context58cf9e4228ab9cbe375ddf37d04d45f1 + issue_hash_content_of_line_in_context666dce676597e2cfa3199521864f7b96 issue_context_kindObjective-C method issue_contextNoCopyString issue_hash_function_offset0 @@ -12217,9 +12217,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte1b0176b31382e7e75129dd78883c91b + issue_hash_content_of_line_in_context31104cdb408dbc3faf693a5c31973486 issue_context_kindObjective-C method issue_contextnoCopyString issue_hash_function_offset0 @@ -12442,9 +12442,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5ff4d17e82026ccd84121b0a361fc135 + issue_hash_content_of_line_in_context909638940b4d7020f51062089653b231 issue_context_kindfunction issue_contexttest_RDar6859457 issue_hash_function_offset1 @@ -12704,9 +12704,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context964683651b544d6c1cce0c4ae6961936 + issue_hash_content_of_line_in_context2a37743e32cfa0a86958fed215c30e87 issue_context_kindfunction issue_contexttest_RDar6859457 issue_hash_function_offset2 @@ -12794,9 +12794,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextca046c4c96c27a0e8c84dd707563bba9 + issue_hash_content_of_line_in_context20b25f0ba6268e055d8491c67c6a26bd issue_context_kindObjective-C method issue_context: issue_hash_function_offset1 @@ -12914,9 +12914,9 @@ descriptionPotential leak of an object of type 'id' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context12515c1f2d3343496d32a54ef376347d + issue_hash_content_of_line_in_context706b9d732ece93a88487dbbf0b82fd23 issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset1 @@ -13071,9 +13071,9 @@ descriptionPotential leak of an object of type 'id' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte10d7d441805b9f66c118bfeccf32f29 + issue_hash_content_of_line_in_context631eebb0c921191c24734f98fe93f6bf issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset2 @@ -13229,9 +13229,9 @@ descriptionPotential leak of an object of type 'CGImageRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3ae54947ad02e14773ac126982de301d + issue_hash_content_of_line_in_contextee36a48521a32c183a086066d3c5ae1f issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset3 @@ -13373,9 +13373,9 @@ descriptionPotential leak of an object of type 'CGImageRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6dba0d2672617f7eb2c512129fb17bb3 + issue_hash_content_of_line_in_context70a2dd4ee6b6f7caad87a46dc6dd3580 issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset4 @@ -13484,9 +13484,9 @@ descriptionPotential leak of an object of type 'CGLayerRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb065641c4257dac33ff15b08859d09e2 + issue_hash_content_of_line_in_contexta82448687d1cbf5cb517914dbe6de4fe issue_context_kindfunction issue_contextrdar6945561 issue_hash_function_offset1 @@ -13590,9 +13590,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7cbb4f547b5c1fb1a456ecc47f27d853 + issue_hash_content_of_line_in_context540e0145994c1e14ea750fe91a497855 issue_context_kindfunction issue_contextIOBSDNameMatching_wrapper issue_hash_function_offset1 @@ -13696,9 +13696,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0b329ce97e1baf94f89590888a4af794 + issue_hash_content_of_line_in_context99d7012d797e181ef8e9a289ee9099eb issue_context_kindfunction issue_contextIOServiceMatching_wrapper issue_hash_function_offset1 @@ -13802,9 +13802,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte207241fbe4666cffeeca3f47966425f + issue_hash_content_of_line_in_context5d956e58f05bcc1b67ff65e02cbba302 issue_context_kindfunction issue_contextIOServiceNameMatching_wrapper issue_hash_function_offset1 @@ -13998,9 +13998,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextae61d11111bc6c9f049a5ca8935b7bae + issue_hash_content_of_line_in_context84a53bfb58a3a929535b47e28b997382 issue_context_kindfunction issue_contextIOServiceAddNotification_wrapper issue_hash_function_offset4 @@ -14107,9 +14107,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context62fc802833a96d44d2fa008826c46c64 + issue_hash_content_of_line_in_context36337ff486f6a8b702e68d13393bc975 issue_context_kindfunction issue_contextIORegistryEntryIDMatching_wrapper issue_hash_function_offset1 @@ -14213,9 +14213,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context644a1e5f3d844a5d9b140de26e6e5645 + issue_hash_content_of_line_in_contextee83ca968ddc2ecad7ae4318ce7d1d95 issue_context_kindfunction issue_contextIOOpenFirmwarePathMatching_wrapper issue_hash_function_offset1 @@ -14410,9 +14410,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context904a99d378144e5aa011649cec493695 + issue_hash_content_of_line_in_contexte8c08b2b3d53f5890907888e16927805 issue_context_kindfunction issue_contextIOServiceGetMatchingService_wrapper issue_hash_function_offset3 @@ -14607,9 +14607,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context23c94c459003beb49ea078f75a86ccc5 + issue_hash_content_of_line_in_context31664b5acc7980da73f5545fb16b0910 issue_context_kindfunction issue_contextIOServiceGetMatchingServices_wrapper issue_hash_function_offset3 @@ -14804,9 +14804,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context06e6fa1f7f96818fbd619dfe8b210b0d + issue_hash_content_of_line_in_context6edae46016a9671e2d5400b100d5efb5 issue_context_kindfunction issue_contextIOServiceAddMatchingNotification_wrapper issue_hash_function_offset4 @@ -15111,9 +15111,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1692047c1a2ab283584ae01c84e3ae35 + issue_hash_content_of_line_in_contextdcec4e2bd254a3c24e84e598b5a827bf issue_context_kindfunction issue_contextrdar_7152619 issue_hash_function_offset4 @@ -15311,9 +15311,9 @@ descriptionPotential leak of an object of type 'CGColorSpaceRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context17e5c3184216ca3aef86288dc1f41d8d + issue_hash_content_of_line_in_context9317a6bf07dd10dc988f2415cc2c4ef7 issue_context_kindfunction issue_contextrdar_7184450 issue_hash_function_offset13 @@ -15511,9 +15511,9 @@ descriptionPotential leak of an object of type 'CGColorSpaceRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc2225660bdec84d2ae183eda303a1abb + issue_hash_content_of_line_in_contextec3e6216b279aa48d8403c6aab30d996 issue_context_kindfunction issue_contextrdar_7184450_pos issue_hash_function_offset13 @@ -15729,9 +15729,9 @@ descriptionPotential leak of an object stored into 'myGradient' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6415d6b7dd7d48a2ef27f4c4d0168c64 + issue_hash_content_of_line_in_context4b3d6bb6b8dc5c51b7dfa8554b24eb66 issue_context_kindfunction issue_contextrdar_7184450_pos issue_hash_function_offset13 @@ -15848,9 +15848,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context08a69979bb4fa932512da1327fbf3b23 + issue_hash_content_of_line_in_context42a83016e862ec323e24920873073a5a issue_context_kindfunction issue_contextrdar_7299394_positive issue_hash_function_offset1 @@ -15988,9 +15988,9 @@ descriptionPotential leak of an object of type 'CGContextRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context32b76a1b35c681cad8093c7e79e36388 + issue_hash_content_of_line_in_contexta416473fed3a9dbc6bfee885bee38216 issue_context_kindfunction issue_contextrdar_7358899 issue_hash_function_offset7 @@ -16099,9 +16099,9 @@ descriptionPotential leak of an object stored into 'y' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7e6172f0b4b6af27712153519e1934e1 + issue_hash_content_of_line_in_context980dd45e9cf6581dbc2be9ebfc500b7f issue_context_kindfunction issue_contextrdar7265711_a issue_hash_function_offset1 @@ -16239,9 +16239,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5eb97f906bb3af4befe63c891484f791 + issue_hash_content_of_line_in_contextebf51fb2b16499cf3a5c57d251a91061 issue_context_kindfunction issue_contextrdar7306898 issue_hash_function_offset4 @@ -16682,9 +16682,9 @@ descriptionPotential leak of an object stored into 'str' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6b9b51ce7b68ca0ba6a85e8924601a96 + issue_hash_content_of_line_in_context1174ccc2a30887ebf80fe25fc6722b1a issue_context_kindfunction issue_contexttest_attr_1 issue_hash_function_offset1 @@ -16788,9 +16788,9 @@ descriptionPotential leak of an object stored into 'str' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexteb040d5ec198d092ec9894af4dce6af8 + issue_hash_content_of_line_in_contextce9963dd1c85ac22cea4e4fef615354e issue_context_kindfunction issue_contexttest_attr_1b issue_hash_function_offset1 @@ -16977,9 +16977,9 @@ descriptionPotential leak of an object stored into 'str2' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context21b45a41bb0c3c70a0efe89359ff3385 + issue_hash_content_of_line_in_context0183088266857082f35eb17f1377fd69 issue_context_kindfunction issue_contexttest_attr1c issue_hash_function_offset2 @@ -17227,9 +17227,9 @@ descriptionPotential leak of an object stored into 'str4' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context60396abae77bacd747ea9081b63a32db + issue_hash_content_of_line_in_context352a17ef8eddd3aa5f7f6e74a74a4df3 issue_context_kindfunction issue_contexttest_attr1c issue_hash_function_offset4 @@ -17336,9 +17336,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte258a710e07550a3dc5f47361a7380e1 + issue_hash_content_of_line_in_contextd0e564404585060990202acb33f0bb1e issue_context_kindfunction issue_contexttestattr2_a issue_hash_function_offset1 @@ -17442,9 +17442,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextdc245145c78c3421392a20775cdd6f23 + issue_hash_content_of_line_in_context567dfcbc22471ca4ba9f2fccd9ff14fb issue_context_kindfunction issue_contexttestattr2_b issue_hash_function_offset1 @@ -17582,9 +17582,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context77b970319b12b0c189e46ad65fa848c7 + issue_hash_content_of_line_in_context83cd2670977d513443836653fee8147b issue_context_kindfunction issue_contexttestattr2_b_11358224_self_assign_looses_the_leak issue_hash_function_offset1 @@ -17670,9 +17670,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context4a8d774d2b821ce1601df7edabf66097 + issue_hash_content_of_line_in_contextf83246e7e738918426df1adc915f4eca issue_context_kindObjective-C method issue_contextnewString issue_hash_function_offset1 @@ -18179,9 +18179,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2a609b8807dab6d3cb1a1db524094f2f + issue_hash_content_of_line_in_context5f233261d96f1d461af36fc3e0efc8eb issue_context_kindObjective-C method issue_contextnewCFRetainedAsCFNoAttr issue_hash_function_offset1 @@ -18444,9 +18444,9 @@ descriptionPotential leak of an object of type 'CFDateRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context944f189da47b1406f9cca6f17ad9f77c + issue_hash_content_of_line_in_context7ee55b74b5ee01c6ffa2a3d83c8cf88b issue_context_kindObjective-C method issue_contextalsoReturnsRetained issue_hash_function_offset1 @@ -18707,9 +18707,9 @@ descriptionPotential leak of an object of type 'CFDateRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context30ebf65449c31336f8a97555d79f1943 + issue_hash_content_of_line_in_context177b2cf7eb3d8334393ee0861f5a38ac issue_context_kindObjective-C method issue_contextalsoReturnsRetainedAsCF issue_hash_function_offset1 @@ -18849,9 +18849,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2ab1a2345ddfa1fd48777c7c179d4e33 + issue_hash_content_of_line_in_context85e9d8130a1f1ec37f0ba26746abd749 issue_context_kindfunction issue_contexttest_panic_negative issue_hash_function_offset2 @@ -19087,9 +19087,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextf96bb4f5c1af6cf932d7ab58b678c235 + issue_hash_content_of_line_in_context4a0b16976e0517b38b2ccc16e2928c2e issue_context_kindfunction issue_contexttest_panic_neg_2 issue_hash_function_offset2 @@ -19210,9 +19210,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context14182fb28ed03595f896c2f8536ac111 + issue_hash_content_of_line_in_contextaf73d9c62952a300a7c393ebd5073f75 issue_context_kindfunction issue_contexttest_blocks_1_pos issue_hash_function_offset1 @@ -19497,9 +19497,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextdbf800f836ff675d2f779f7417877c1b + issue_hash_content_of_line_in_context771b2a332053388ffbdd9ba74ea84c5e issue_context_kindfunction issue_contexttest_blocks_1_indirect_retain_via_call issue_hash_function_offset1 @@ -19895,9 +19895,9 @@ descriptionPotential leak of an object stored into 'info' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context64424de797303506a3dfdb52fa765645 + issue_hash_content_of_line_in_context39f8c30f7436f678d5259c0fdd3a0dad issue_context_kindfunction issue_contextrdar_8724287 issue_hash_function_offset7 @@ -19988,9 +19988,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7b7fc0c36e58713202141cb584150903 + issue_hash_content_of_line_in_context107e3efdeb8cdff4bef4c64183c4f6fa issue_context_kindfunction issue_contextcamelcase_createno issue_hash_function_offset1 @@ -20074,9 +20074,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context32912dd9518de1b3f4cc8ba38368f7e6 + issue_hash_content_of_line_in_context20c973a013858abb0a926276c956f858 issue_context_kindfunction issue_contextcamelcase_copying issue_hash_function_offset1 @@ -20160,9 +20160,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1dccc42846a9ef9bf1a1830e277d5b78 + issue_hash_content_of_line_in_context80ee99e51561a37297429740e3a4da0c issue_context_kindfunction issue_contextcamel_creat issue_hash_function_offset1 @@ -20246,9 +20246,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2a0ba33097f6e9362a79689e2ac0cf4a + issue_hash_content_of_line_in_contexta4e28a04f6a8d87c8aaf4d71c37cac0f issue_context_kindfunction issue_contextcamel_copymachine issue_hash_function_offset1 @@ -20385,9 +20385,9 @@ descriptionPotential leak of an object stored into 'vals' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context43f6c1be372d09a4a4cffaefa69d0148 + issue_hash_content_of_line_in_context6b727a438d8411c058fd32867b9402bc issue_context_kindfunction issue_contextrdar6582778 issue_hash_function_offset2 @@ -20650,9 +20650,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextebe7e868c0075bfa7480e3359e4fbce8 + issue_hash_content_of_line_in_contextb39dcf9df7cec8dd73cbbe25b2a7d6c5 issue_context_kindfunction issue_contextrdar10232019_positive issue_hash_function_offset6 @@ -20807,9 +20807,9 @@ descriptionPotential leak of an object stored into 'a' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context507c3679ae27249e01844b7555843688 + issue_hash_content_of_line_in_contexta501f743b22f1feb5dc317fcad4f7556 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset3 @@ -21033,9 +21033,9 @@ descriptionPotential leak of an object stored into 'a2' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context821f8268a0b7d3f90e4dd88fa1edf39b + issue_hash_content_of_line_in_contexta141a6ad33e8ff2ae3b13da0ad36ebc5 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset12 @@ -21442,9 +21442,9 @@ descriptionPotential leak of an object stored into 'a3' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context37b00e6e0e6b792ea3294a9ffd6f4886 + issue_hash_content_of_line_in_context2b072d75e8da8e3fe8f7968a85efb37c issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset20 @@ -21815,9 +21815,9 @@ descriptionPotential leak of an object stored into 'a' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context62fc5b80705a03ab1d8b50bdcfbfb179 + issue_hash_content_of_line_in_context0bfdfb7e392626e0fccc6ab9f58f1ca8 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset28 @@ -22370,9 +22370,9 @@ descriptionPotential leak of an object stored into 'a' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3eee239ca30a84ef6ecc5d154ae8df28 + issue_hash_content_of_line_in_contextff7c34e661a42d06a7fb3e9669e70339 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset37 @@ -22643,9 +22643,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextcb86fdadd2217db6b784b37dc29eba34 + issue_hash_content_of_line_in_context73e84c042932d2e17e00f00dc3d36d5a issue_context_kindfunction issue_contexttest_objc_integer_literals issue_hash_function_offset1 @@ -22874,9 +22874,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context4ad9235c4885452c3034fef815598a63 + issue_hash_content_of_line_in_context465e592d4f7a187717d00b8154a614b5 issue_context_kindfunction issue_contexttest_objc_boxed_expressions issue_hash_function_offset1 @@ -23159,9 +23159,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9d3a52ee2efe90fef76f91f143f0d9e7 + issue_hash_content_of_line_in_contextc701bd0c60f51d96c047aa78c9e0eb99 issue_context_kindfunction issue_contexttest_objc_boxed_expressions issue_hash_function_offset4 @@ -23523,9 +23523,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0aad7b0550b51ebc0a2323c482d8eefd + issue_hash_content_of_line_in_contexta4cedbb647e9632da7a5072cb839e54a issue_context_kindfunction issue_contextrdar11400885 issue_hash_function_offset9 @@ -23683,9 +23683,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3b63deb8c998b2d73dd63da9f89672bb + issue_hash_content_of_line_in_contextfd9427d86a2357fd92478c9c7abbc1f4 issue_context_kindfunction issue_contexttestConsumeAndStopTracking issue_hash_function_offset10 @@ -23842,9 +23842,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexta4fe04db2f5fa1aa2b6d8d18ccb5dd02 + issue_hash_content_of_line_in_context0e65e51476e5671dcd37f632806e5147 issue_context_kindfunction issue_contexttestCFConsumeAndStopTracking issue_hash_function_offset10 @@ -23952,9 +23952,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context55f656da79f1b87a4b5618167f68c233 + issue_hash_content_of_line_in_contexta0ba9c47505e923763ea5323ad2f71b7 issue_context_kindfunction issue_contexttest_custom_cf issue_hash_function_offset1 @@ -24058,9 +24058,9 @@ descriptionPotential leak of an object stored into 'obj' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexta7b4693fabae95c6b2091c7816fb2358 + issue_hash_content_of_line_in_context7a6cf8cb3c5e0ca3125d7e27695a810a issue_context_kindfunction issue_contexttestCustomReturnsRetained issue_hash_function_offset1 @@ -24145,9 +24145,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context51de919c9df9dec2d383d050bf73d2d8 + issue_hash_content_of_line_in_context810fce32373fe40ba8e2d0894d46f667 issue_context_kindfunction issue_contexttestCustomReturnsNotRetained issue_hash_function_offset1 @@ -24502,9 +24502,9 @@ descriptionPotential leak of an object of type 'MyObj12706177 *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd8890e44d330279fd91ce8fdb35d7c81 + issue_hash_content_of_line_in_context68ee7961ffb62c575cc2298cb4836090 issue_context_kindObjective-C method issue_contexttest12706177 issue_hash_function_offset1 @@ -24734,9 +24734,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd4c839aab11cc39188d1054f3270d67f + issue_hash_content_of_line_in_context1dc376fbbe90d14b6766585a0e2b7bee issue_context_kindfunction issue_contextgetIncorrectlyAutoreleasedCFType issue_hash_function_offset2 @@ -24963,9 +24963,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd2d9e8a977772482263591670a124c5d + issue_hash_content_of_line_in_context6ae8ea9fe4bf203e6b7bfaf649a6ca6a issue_context_kindfunction issue_contextcreateIncorrectlyAutoreleasedCFType issue_hash_function_offset2 @@ -25158,9 +25158,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc483bb676bdbea00f7e99b3617b4b6e2 + issue_hash_content_of_line_in_contextd4e28f96fc8610b5b4b849f4760956eb issue_context_kindfunction issue_contextuseAfterRelease issue_hash_function_offset7 @@ -25415,9 +25415,9 @@ descriptionPotential leak of an object stored into 'obj' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5bbb9b1720912f3fd2c67b3332de793b + issue_hash_content_of_line_in_context7986c4b7fb29301c109343dfe4155202 issue_context_kindfunction issue_contexttestAutoreleaseReturnsInput issue_hash_function_offset2 @@ -25673,9 +25673,9 @@ descriptionPotential leak of an object stored into 'arr' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextea7d6978bcb6da71c23b4bb6fef51a87 + issue_hash_content_of_line_in_context2e0dbfdf379acf2f09e46db47d753e8a issue_context_kindfunction issue_contextautoreleaseReturningTypedObject issue_hash_function_offset1 @@ -25890,9 +25890,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1f4f3ca2f399a94e54304b4a0dcb1e85 + issue_hash_content_of_line_in_context41a2d6f91fdfa9b5f396102a60571e21 issue_context_kindfunction issue_contextautoreleaseObjC issue_hash_function_offset6 @@ -26048,9 +26048,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextced44137127627330194b72c97aef162 + issue_hash_content_of_line_in_context95dd5581ae4195b71e9a11f34290af5d issue_context_kindfunction issue_contexttestCFReturnsNotRetained issue_hash_function_offset4 @@ -26204,9 +26204,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte7615a640885cbd55bc856bfc07d7123 + issue_hash_content_of_line_in_context014103674df4a8a65a96bcdf936637a2 issue_context_kindfunction issue_contexttestCFReturnsNotRetainedAnnotated issue_hash_function_offset4 diff --git a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist index 79145156f2649..d797626af86df 100644 --- a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist +++ b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist @@ -397,9 +397,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1089a297e77ff0c9d2d55cfb3aae26d3 + issue_hash_content_of_line_in_context5928b2a4699cbae0686391c20e639007 issue_context_kindfunction issue_contextf1 issue_hash_function_offset7 @@ -816,9 +816,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextbb12c99d56657635b20d4a0801590eed + issue_hash_content_of_line_in_context6b2e175938153ac041f52ebbf50b1f43 issue_context_kindfunction issue_contextf2 issue_hash_function_offset7 @@ -1107,9 +1107,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0e9bb151f425535a0ec1b0bf0574dd7d + issue_hash_content_of_line_in_context3fdbd844ddb925306ba2bb1b3626f310 issue_context_kindfunction issue_contextf5 issue_hash_function_offset2 @@ -1305,9 +1305,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextad4b758c93bbe7feeee349a526293527 + issue_hash_content_of_line_in_context8529da75e357c59fb0a7fefb0b6e0952 issue_context_kindfunction issue_contextf6 issue_hash_function_offset1 @@ -1502,9 +1502,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2a319c210c1c5b4274e3f28931ead03b + issue_hash_content_of_line_in_contexteb0faa12081b1e28b218e4c6e53d57ec issue_context_kindfunction issue_contextf7 issue_hash_function_offset1 @@ -1659,9 +1659,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2c347e0a0af508867a6d854a3fc8f690 + issue_hash_content_of_line_in_context404d4de8faa444bc52fd510380bd0a63 issue_context_kindfunction issue_contextf7 issue_hash_function_offset3 @@ -1857,9 +1857,9 @@ descriptionPotential leak of an object stored into 'date' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0be746eb38e868156f7f57ea95735f4e + issue_hash_content_of_line_in_context251dff6727b3d99ec95caa28672669ea issue_context_kindfunction issue_contextf8 issue_hash_function_offset1 @@ -2562,9 +2562,9 @@ descriptionPotential leak of an object stored into 'disk' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3e83186b5b944ef7a3ec026d469d5ad7 + issue_hash_content_of_line_in_context69ae08a90fe52a921ed423df38ed7480 issue_context_kindfunction issue_contextf10 issue_hash_function_offset1 @@ -3045,9 +3045,9 @@ descriptionPotential leak of an object stored into 'dict' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextffc6479dc21fc10cdb83b4392685ed36 + issue_hash_content_of_line_in_contexta7f8c63b1cdc39df79b7457e27ff4930 issue_context_kindfunction issue_contextf10 issue_hash_function_offset7 @@ -3660,9 +3660,9 @@ descriptionPotential leak of an object stored into 'disk' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1c06fc99a1d078653ae8e4fe308e09cd + issue_hash_content_of_line_in_contextcace8e35bed93ecdfa0455ac166aaa97 issue_context_kindfunction issue_contextf10 issue_hash_function_offset10 @@ -4345,9 +4345,9 @@ descriptionPotential leak of an object stored into 'disk' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context460f099c6ae21a4b3ae818c9f65df2b0 + issue_hash_content_of_line_in_context778f70549a15e78703b4dcb3a287df33 issue_context_kindfunction issue_contextf10 issue_hash_function_offset4 @@ -5162,9 +5162,9 @@ descriptionPotential leak of an object stored into 'dissenter' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context65004e269b1b5cb5d9b5c6f7a02926e3 + issue_hash_content_of_line_in_context6c188b4716e84cdc55b93d40e6c2daf3 issue_context_kindfunction issue_contextf10 issue_hash_function_offset13 @@ -6044,9 +6044,9 @@ descriptionPotential leak of an object stored into 'session' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte9c1be038ef498b7985f5b1ddcb5444f + issue_hash_content_of_line_in_context35b9ac7ff198890c88d5839a898b7fea issue_context_kindfunction issue_contextf10 issue_hash_function_offset17 @@ -6161,9 +6161,9 @@ descriptionPotential leak of an object stored into 'f' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9c7c3b2bf298c7d046fd6fc7f6fe688e + issue_hash_content_of_line_in_context17d84d673b35235b52d8f8f00c1d1eea issue_context_kindfunction issue_contexttestLeakCoreMediaReferenceType issue_hash_function_offset1 @@ -6282,9 +6282,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context69932084739a429d667d8de6de42af0b + issue_hash_content_of_line_in_context1702285448a953b02ab74a8eb9a610d9 issue_context_kindfunction issue_contexttestOverReleaseMediaReferenceType issue_hash_function_offset2 @@ -6674,9 +6674,9 @@ descriptionPotential leak of an object stored into 'buffer' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0f30258c45ed9ecd8646db90eaf20c4a + issue_hash_content_of_line_in_context402566b4ddf1683dac1aefc1ab3e76e9 issue_context_kindfunction issue_contexttestCMBufferQueueDequeueAndRetain issue_hash_function_offset1 @@ -6829,9 +6829,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context13e672795c0e57433c642c84f26f6c9b + issue_hash_content_of_line_in_context143ef5974bfece95e9894da5250aaff0 issue_context_kindfunction issue_contextf11 issue_hash_function_offset21 @@ -6941,9 +6941,9 @@ descriptionPotential leak of an object stored into 'o' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexteeff9e133573bdbc1aeb633284cbdb2b + issue_hash_content_of_line_in_contextaf4ad99c5fb565d82e1b4848aaca4e24 issue_context_kindfunction issue_contextf12 issue_hash_function_offset1 @@ -7197,9 +7197,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context620a4245edc8df18036da34702ca01c8 + issue_hash_content_of_line_in_context58a0b3f8332f42561f89b11f6eb5e91f issue_context_kindfunction issue_contextf13_autorelease_b issue_hash_function_offset4 @@ -7470,9 +7470,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1a87a5f904c165069a731b0325d45edf + issue_hash_content_of_line_in_context612dc6574d54c8010703a9776d8a4a0a issue_context_kindfunction issue_contextf13_autorelease_c issue_hash_function_offset4 @@ -7777,9 +7777,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6ed645efdfe968f31d4356610bb6dd02 + issue_hash_content_of_line_in_contextc57037289bc3acc586de325df25951ed issue_context_kindfunction issue_contextf13_autorelease_d issue_hash_function_offset4 @@ -7885,9 +7885,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5295be41524e9e28f4b1a608006801fe + issue_hash_content_of_line_in_context6abb479bc4c7782a125d680fddf825ef issue_context_kindfunction issue_contextf14_leakimmediately issue_hash_function_offset1 @@ -8891,9 +8891,9 @@ descriptionPotential leak of an object stored into 'bmap' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2e5affde083280f6d31ed412ac8c2396 + issue_hash_content_of_line_in_context2cfebefee7b63ce3954419e571be4f63 issue_context_kindfunction issue_contextf18 issue_hash_function_offset2 @@ -9012,9 +9012,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextfdd0cb02c08c718da2686b6e0f04aad7 + issue_hash_content_of_line_in_contextdcd3becc58a149abe6ade5598138d3dd issue_context_kindObjective-C method issue_contextnewString issue_hash_function_offset2 @@ -9230,9 +9230,9 @@ descriptionPotential leak of an object stored into 'kind' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context03f39b74e1ccafa9c613ba4bb71de560 + issue_hash_content_of_line_in_context6688c9cb12f0c76ec80eb03b1d2eddf8 issue_context_kindfunction issue_contextrdar_6659160 issue_hash_function_offset5 @@ -10529,9 +10529,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc8a4713a734a4f6e747423ef88af6bf8 + issue_hash_content_of_line_in_contextd04966e9b8e981d8f69bf03823253033 issue_context_kindfunction issue_contextrdar_6659160 issue_hash_function_offset33 @@ -10737,9 +10737,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context83c7891609f8efb616060d0c6ae6bb43 + issue_hash_content_of_line_in_context1b35183a6aca4df5a8732c8da94e3205 issue_context_kindfunction issue_contextpr3820_ReleaseAfterDealloc issue_hash_function_offset3 @@ -10969,9 +10969,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9fe338c720f25b3b1d5a68930d3ae4b8 + issue_hash_content_of_line_in_context54f2bd1534fa675b58c4f8eef3120373 issue_context_kindfunction issue_contextpr3820_DeallocAfterRelease issue_hash_function_offset4 @@ -11221,9 +11221,9 @@ descriptionPotential leak of an object stored into 'dict' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextdf3400f53fc437aede21f685ca1955d4 + issue_hash_content_of_line_in_context055e6f3413539276fedeac241fccd9b8 issue_context_kindObjective-C method issue_contextapplicationDidFinishLaunching: issue_hash_function_offset1 @@ -11535,9 +11535,9 @@ descriptionPotential leak of an object stored into 'dict' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5104ca579763af0f8c66da3fdc42b95f + issue_hash_content_of_line_in_context444f6019b048a95dd71c6be49ecb73ff issue_context_kindObjective-C method issue_contextradar10102244 issue_hash_function_offset1 @@ -11691,9 +11691,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexta4a85a3991cb3888217d5c62346107dc + issue_hash_content_of_line_in_context641de26edd3d85ca241de577afbcda86 issue_context_kindfunction issue_contextrdar_6257780_Case1 issue_hash_function_offset3 @@ -11847,9 +11847,9 @@ descriptionPotential leak of an object of type 'RDar6320065Subclass *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context75b7ad344b1d4665d918188bd10429df + issue_hash_content_of_line_in_context8e8ae80fd006f27a952f77494bd1c05f issue_context_kindObjective-C method issue_context_initReturningNewClassBad issue_hash_function_offset2 @@ -12044,9 +12044,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context791e285d27d610c4c016065dd5addd37 + issue_hash_content_of_line_in_context625e26ef3ae9de238f30175e4e9f4937 issue_context_kindObjective-C method issue_contextinitReturningNewClassBad2 issue_hash_function_offset3 @@ -12132,9 +12132,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context58cf9e4228ab9cbe375ddf37d04d45f1 + issue_hash_content_of_line_in_context666dce676597e2cfa3199521864f7b96 issue_context_kindObjective-C method issue_contextNoCopyString issue_hash_function_offset0 @@ -12217,9 +12217,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte1b0176b31382e7e75129dd78883c91b + issue_hash_content_of_line_in_context31104cdb408dbc3faf693a5c31973486 issue_context_kindObjective-C method issue_contextnoCopyString issue_hash_function_offset0 @@ -12442,9 +12442,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5ff4d17e82026ccd84121b0a361fc135 + issue_hash_content_of_line_in_context909638940b4d7020f51062089653b231 issue_context_kindfunction issue_contexttest_RDar6859457 issue_hash_function_offset1 @@ -12704,9 +12704,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context964683651b544d6c1cce0c4ae6961936 + issue_hash_content_of_line_in_context2a37743e32cfa0a86958fed215c30e87 issue_context_kindfunction issue_contexttest_RDar6859457 issue_hash_function_offset2 @@ -12794,9 +12794,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextca046c4c96c27a0e8c84dd707563bba9 + issue_hash_content_of_line_in_context20b25f0ba6268e055d8491c67c6a26bd issue_context_kindObjective-C method issue_context: issue_hash_function_offset1 @@ -12914,9 +12914,9 @@ descriptionPotential leak of an object of type 'id' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context12515c1f2d3343496d32a54ef376347d + issue_hash_content_of_line_in_context706b9d732ece93a88487dbbf0b82fd23 issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset1 @@ -13105,9 +13105,9 @@ descriptionPotential leak of an object of type 'id' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte10d7d441805b9f66c118bfeccf32f29 + issue_hash_content_of_line_in_context631eebb0c921191c24734f98fe93f6bf issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset2 @@ -13297,9 +13297,9 @@ descriptionPotential leak of an object of type 'CGImageRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3ae54947ad02e14773ac126982de301d + issue_hash_content_of_line_in_contextee36a48521a32c183a086066d3c5ae1f issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset3 @@ -13441,9 +13441,9 @@ descriptionPotential leak of an object of type 'CGImageRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6dba0d2672617f7eb2c512129fb17bb3 + issue_hash_content_of_line_in_context70a2dd4ee6b6f7caad87a46dc6dd3580 issue_context_kindfunction issue_contextrdar6902710 issue_hash_function_offset4 @@ -13552,9 +13552,9 @@ descriptionPotential leak of an object of type 'CGLayerRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb065641c4257dac33ff15b08859d09e2 + issue_hash_content_of_line_in_contexta82448687d1cbf5cb517914dbe6de4fe issue_context_kindfunction issue_contextrdar6945561 issue_hash_function_offset1 @@ -13658,9 +13658,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7cbb4f547b5c1fb1a456ecc47f27d853 + issue_hash_content_of_line_in_context540e0145994c1e14ea750fe91a497855 issue_context_kindfunction issue_contextIOBSDNameMatching_wrapper issue_hash_function_offset1 @@ -13764,9 +13764,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0b329ce97e1baf94f89590888a4af794 + issue_hash_content_of_line_in_context99d7012d797e181ef8e9a289ee9099eb issue_context_kindfunction issue_contextIOServiceMatching_wrapper issue_hash_function_offset1 @@ -13870,9 +13870,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte207241fbe4666cffeeca3f47966425f + issue_hash_content_of_line_in_context5d956e58f05bcc1b67ff65e02cbba302 issue_context_kindfunction issue_contextIOServiceNameMatching_wrapper issue_hash_function_offset1 @@ -14066,9 +14066,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextae61d11111bc6c9f049a5ca8935b7bae + issue_hash_content_of_line_in_context84a53bfb58a3a929535b47e28b997382 issue_context_kindfunction issue_contextIOServiceAddNotification_wrapper issue_hash_function_offset4 @@ -14175,9 +14175,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context62fc802833a96d44d2fa008826c46c64 + issue_hash_content_of_line_in_context36337ff486f6a8b702e68d13393bc975 issue_context_kindfunction issue_contextIORegistryEntryIDMatching_wrapper issue_hash_function_offset1 @@ -14281,9 +14281,9 @@ descriptionPotential leak of an object of type 'CFMutableDictionaryRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context644a1e5f3d844a5d9b140de26e6e5645 + issue_hash_content_of_line_in_contextee83ca968ddc2ecad7ae4318ce7d1d95 issue_context_kindfunction issue_contextIOOpenFirmwarePathMatching_wrapper issue_hash_function_offset1 @@ -14478,9 +14478,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context904a99d378144e5aa011649cec493695 + issue_hash_content_of_line_in_contexte8c08b2b3d53f5890907888e16927805 issue_context_kindfunction issue_contextIOServiceGetMatchingService_wrapper issue_hash_function_offset3 @@ -14675,9 +14675,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context23c94c459003beb49ea078f75a86ccc5 + issue_hash_content_of_line_in_context31664b5acc7980da73f5545fb16b0910 issue_context_kindfunction issue_contextIOServiceGetMatchingServices_wrapper issue_hash_function_offset3 @@ -14872,9 +14872,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context06e6fa1f7f96818fbd619dfe8b210b0d + issue_hash_content_of_line_in_context6edae46016a9671e2d5400b100d5efb5 issue_context_kindfunction issue_contextIOServiceAddMatchingNotification_wrapper issue_hash_function_offset4 @@ -15179,9 +15179,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1692047c1a2ab283584ae01c84e3ae35 + issue_hash_content_of_line_in_contextdcec4e2bd254a3c24e84e598b5a827bf issue_context_kindfunction issue_contextrdar_7152619 issue_hash_function_offset4 @@ -15380,9 +15380,9 @@ descriptionPotential leak of an object of type 'CGColorSpaceRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context17e5c3184216ca3aef86288dc1f41d8d + issue_hash_content_of_line_in_context9317a6bf07dd10dc988f2415cc2c4ef7 issue_context_kindfunction issue_contextrdar_7184450 issue_hash_function_offset13 @@ -15580,9 +15580,9 @@ descriptionPotential leak of an object of type 'CGColorSpaceRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc2225660bdec84d2ae183eda303a1abb + issue_hash_content_of_line_in_contextec3e6216b279aa48d8403c6aab30d996 issue_context_kindfunction issue_contextrdar_7184450_pos issue_hash_function_offset13 @@ -15798,9 +15798,9 @@ descriptionPotential leak of an object stored into 'myGradient' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6415d6b7dd7d48a2ef27f4c4d0168c64 + issue_hash_content_of_line_in_context4b3d6bb6b8dc5c51b7dfa8554b24eb66 issue_context_kindfunction issue_contextrdar_7184450_pos issue_hash_function_offset13 @@ -15917,9 +15917,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context08a69979bb4fa932512da1327fbf3b23 + issue_hash_content_of_line_in_context42a83016e862ec323e24920873073a5a issue_context_kindfunction issue_contextrdar_7299394_positive issue_hash_function_offset1 @@ -16057,9 +16057,9 @@ descriptionPotential leak of an object of type 'CGContextRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context32b76a1b35c681cad8093c7e79e36388 + issue_hash_content_of_line_in_contexta416473fed3a9dbc6bfee885bee38216 issue_context_kindfunction issue_contextrdar_7358899 issue_hash_function_offset7 @@ -16168,9 +16168,9 @@ descriptionPotential leak of an object stored into 'y' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7e6172f0b4b6af27712153519e1934e1 + issue_hash_content_of_line_in_context980dd45e9cf6581dbc2be9ebfc500b7f issue_context_kindfunction issue_contextrdar7265711_a issue_hash_function_offset1 @@ -16308,9 +16308,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5eb97f906bb3af4befe63c891484f791 + issue_hash_content_of_line_in_contextebf51fb2b16499cf3a5c57d251a91061 issue_context_kindfunction issue_contextrdar7306898 issue_hash_function_offset4 @@ -16751,9 +16751,9 @@ descriptionPotential leak of an object stored into 'str' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context6b9b51ce7b68ca0ba6a85e8924601a96 + issue_hash_content_of_line_in_context1174ccc2a30887ebf80fe25fc6722b1a issue_context_kindfunction issue_contexttest_attr_1 issue_hash_function_offset1 @@ -16857,9 +16857,9 @@ descriptionPotential leak of an object stored into 'str' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexteb040d5ec198d092ec9894af4dce6af8 + issue_hash_content_of_line_in_contextce9963dd1c85ac22cea4e4fef615354e issue_context_kindfunction issue_contexttest_attr_1b issue_hash_function_offset1 @@ -17046,9 +17046,9 @@ descriptionPotential leak of an object stored into 'str2' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context21b45a41bb0c3c70a0efe89359ff3385 + issue_hash_content_of_line_in_context0183088266857082f35eb17f1377fd69 issue_context_kindfunction issue_contexttest_attr1c issue_hash_function_offset2 @@ -17296,9 +17296,9 @@ descriptionPotential leak of an object stored into 'str4' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context60396abae77bacd747ea9081b63a32db + issue_hash_content_of_line_in_context352a17ef8eddd3aa5f7f6e74a74a4df3 issue_context_kindfunction issue_contexttest_attr1c issue_hash_function_offset4 @@ -17405,9 +17405,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte258a710e07550a3dc5f47361a7380e1 + issue_hash_content_of_line_in_contextd0e564404585060990202acb33f0bb1e issue_context_kindfunction issue_contexttestattr2_a issue_hash_function_offset1 @@ -17511,9 +17511,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextdc245145c78c3421392a20775cdd6f23 + issue_hash_content_of_line_in_context567dfcbc22471ca4ba9f2fccd9ff14fb issue_context_kindfunction issue_contexttestattr2_b issue_hash_function_offset1 @@ -17651,9 +17651,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context77b970319b12b0c189e46ad65fa848c7 + issue_hash_content_of_line_in_context83cd2670977d513443836653fee8147b issue_context_kindfunction issue_contexttestattr2_b_11358224_self_assign_looses_the_leak issue_hash_function_offset1 @@ -17739,9 +17739,9 @@ descriptionPotential leak of an object of type 'NSString *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context4a8d774d2b821ce1601df7edabf66097 + issue_hash_content_of_line_in_contextf83246e7e738918426df1adc915f4eca issue_context_kindObjective-C method issue_contextnewString issue_hash_function_offset1 @@ -18248,9 +18248,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2a609b8807dab6d3cb1a1db524094f2f + issue_hash_content_of_line_in_context5f233261d96f1d461af36fc3e0efc8eb issue_context_kindObjective-C method issue_contextnewCFRetainedAsCFNoAttr issue_hash_function_offset1 @@ -18513,9 +18513,9 @@ descriptionPotential leak of an object of type 'CFDateRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context944f189da47b1406f9cca6f17ad9f77c + issue_hash_content_of_line_in_context7ee55b74b5ee01c6ffa2a3d83c8cf88b issue_context_kindObjective-C method issue_contextalsoReturnsRetained issue_hash_function_offset1 @@ -18776,9 +18776,9 @@ descriptionPotential leak of an object of type 'CFDateRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context30ebf65449c31336f8a97555d79f1943 + issue_hash_content_of_line_in_context177b2cf7eb3d8334393ee0861f5a38ac issue_context_kindObjective-C method issue_contextalsoReturnsRetainedAsCF issue_hash_function_offset1 @@ -18918,9 +18918,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2ab1a2345ddfa1fd48777c7c179d4e33 + issue_hash_content_of_line_in_context85e9d8130a1f1ec37f0ba26746abd749 issue_context_kindfunction issue_contexttest_panic_negative issue_hash_function_offset2 @@ -19156,9 +19156,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextf96bb4f5c1af6cf932d7ab58b678c235 + issue_hash_content_of_line_in_context4a0b16976e0517b38b2ccc16e2928c2e issue_context_kindfunction issue_contexttest_panic_neg_2 issue_hash_function_offset2 @@ -19279,9 +19279,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context14182fb28ed03595f896c2f8536ac111 + issue_hash_content_of_line_in_contextaf73d9c62952a300a7c393ebd5073f75 issue_context_kindfunction issue_contexttest_blocks_1_pos issue_hash_function_offset1 @@ -19566,9 +19566,9 @@ descriptionPotential leak of an object stored into 'number' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextdbf800f836ff675d2f779f7417877c1b + issue_hash_content_of_line_in_context771b2a332053388ffbdd9ba74ea84c5e issue_context_kindfunction issue_contexttest_blocks_1_indirect_retain_via_call issue_hash_function_offset1 @@ -19964,9 +19964,9 @@ descriptionPotential leak of an object stored into 'info' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context64424de797303506a3dfdb52fa765645 + issue_hash_content_of_line_in_context39f8c30f7436f678d5259c0fdd3a0dad issue_context_kindfunction issue_contextrdar_8724287 issue_hash_function_offset7 @@ -20057,9 +20057,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context7b7fc0c36e58713202141cb584150903 + issue_hash_content_of_line_in_context107e3efdeb8cdff4bef4c64183c4f6fa issue_context_kindfunction issue_contextcamelcase_createno issue_hash_function_offset1 @@ -20143,9 +20143,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context32912dd9518de1b3f4cc8ba38368f7e6 + issue_hash_content_of_line_in_context20c973a013858abb0a926276c956f858 issue_context_kindfunction issue_contextcamelcase_copying issue_hash_function_offset1 @@ -20229,9 +20229,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1dccc42846a9ef9bf1a1830e277d5b78 + issue_hash_content_of_line_in_context80ee99e51561a37297429740e3a4da0c issue_context_kindfunction issue_contextcamel_creat issue_hash_function_offset1 @@ -20315,9 +20315,9 @@ descriptionPotential leak of an object of type 'CFMutableArrayRef' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak of returned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context2a0ba33097f6e9362a79689e2ac0cf4a + issue_hash_content_of_line_in_contexta4e28a04f6a8d87c8aaf4d71c37cac0f issue_context_kindfunction issue_contextcamel_copymachine issue_hash_function_offset1 @@ -20454,9 +20454,9 @@ descriptionPotential leak of an object stored into 'vals' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context43f6c1be372d09a4a4cffaefa69d0148 + issue_hash_content_of_line_in_context6b727a438d8411c058fd32867b9402bc issue_context_kindfunction issue_contextrdar6582778 issue_hash_function_offset2 @@ -20719,9 +20719,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextebe7e868c0075bfa7480e3359e4fbce8 + issue_hash_content_of_line_in_contextb39dcf9df7cec8dd73cbbe25b2a7d6c5 issue_context_kindfunction issue_contextrdar10232019_positive issue_hash_function_offset6 @@ -20876,9 +20876,9 @@ descriptionPotential leak of an object stored into 'a' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context507c3679ae27249e01844b7555843688 + issue_hash_content_of_line_in_contexta501f743b22f1feb5dc317fcad4f7556 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset3 @@ -21102,9 +21102,9 @@ descriptionPotential leak of an object stored into 'a2' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context821f8268a0b7d3f90e4dd88fa1edf39b + issue_hash_content_of_line_in_contexta141a6ad33e8ff2ae3b13da0ad36ebc5 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset12 @@ -21511,9 +21511,9 @@ descriptionPotential leak of an object stored into 'a3' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context37b00e6e0e6b792ea3294a9ffd6f4886 + issue_hash_content_of_line_in_context2b072d75e8da8e3fe8f7968a85efb37c issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset20 @@ -21884,9 +21884,9 @@ descriptionPotential leak of an object stored into 'a' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context62fc5b80705a03ab1d8b50bdcfbfb179 + issue_hash_content_of_line_in_context0bfdfb7e392626e0fccc6ab9f58f1ca8 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset28 @@ -22439,9 +22439,9 @@ descriptionPotential leak of an object stored into 'a' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3eee239ca30a84ef6ecc5d154ae8df28 + issue_hash_content_of_line_in_contextff7c34e661a42d06a7fb3e9669e70339 issue_context_kindfunction issue_contexttest_objc_arrays issue_hash_function_offset37 @@ -22712,9 +22712,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextcb86fdadd2217db6b784b37dc29eba34 + issue_hash_content_of_line_in_context73e84c042932d2e17e00f00dc3d36d5a issue_context_kindfunction issue_contexttest_objc_integer_literals issue_hash_function_offset1 @@ -22943,9 +22943,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context4ad9235c4885452c3034fef815598a63 + issue_hash_content_of_line_in_context465e592d4f7a187717d00b8154a614b5 issue_context_kindfunction issue_contexttest_objc_boxed_expressions issue_hash_function_offset1 @@ -23228,9 +23228,9 @@ descriptionPotential leak of an object stored into 'value' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context9d3a52ee2efe90fef76f91f143f0d9e7 + issue_hash_content_of_line_in_contextc701bd0c60f51d96c047aa78c9e0eb99 issue_context_kindfunction issue_contexttest_objc_boxed_expressions issue_hash_function_offset4 @@ -23592,9 +23592,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context0aad7b0550b51ebc0a2323c482d8eefd + issue_hash_content_of_line_in_contexta4cedbb647e9632da7a5072cb839e54a issue_context_kindfunction issue_contextrdar11400885 issue_hash_function_offset9 @@ -23752,9 +23752,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context3b63deb8c998b2d73dd63da9f89672bb + issue_hash_content_of_line_in_contextfd9427d86a2357fd92478c9c7abbc1f4 issue_context_kindfunction issue_contexttestConsumeAndStopTracking issue_hash_function_offset10 @@ -23911,9 +23911,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexta4fe04db2f5fa1aa2b6d8d18ccb5dd02 + issue_hash_content_of_line_in_context0e65e51476e5671dcd37f632806e5147 issue_context_kindfunction issue_contexttestCFConsumeAndStopTracking issue_hash_function_offset10 @@ -24021,9 +24021,9 @@ descriptionPotential leak of an object stored into 'x' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context55f656da79f1b87a4b5618167f68c233 + issue_hash_content_of_line_in_contexta0ba9c47505e923763ea5323ad2f71b7 issue_context_kindfunction issue_contexttest_custom_cf issue_hash_function_offset1 @@ -24127,9 +24127,9 @@ descriptionPotential leak of an object stored into 'obj' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexta7b4693fabae95c6b2091c7816fb2358 + issue_hash_content_of_line_in_context7a6cf8cb3c5e0ca3125d7e27695a810a issue_context_kindfunction issue_contexttestCustomReturnsRetained issue_hash_function_offset1 @@ -24214,9 +24214,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context51de919c9df9dec2d383d050bf73d2d8 + issue_hash_content_of_line_in_context810fce32373fe40ba8e2d0894d46f667 issue_context_kindfunction issue_contexttestCustomReturnsNotRetained issue_hash_function_offset1 @@ -24571,9 +24571,9 @@ descriptionPotential leak of an object of type 'MyObj12706177 *' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd8890e44d330279fd91ce8fdb35d7c81 + issue_hash_content_of_line_in_context68ee7961ffb62c575cc2298cb4836090 issue_context_kindObjective-C method issue_contexttest12706177 issue_hash_function_offset1 @@ -24803,9 +24803,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd4c839aab11cc39188d1054f3270d67f + issue_hash_content_of_line_in_context1dc376fbbe90d14b6766585a0e2b7bee issue_context_kindfunction issue_contextgetIncorrectlyAutoreleasedCFType issue_hash_function_offset2 @@ -25032,9 +25032,9 @@ descriptionObject with a +0 retain count returned to caller where a +1 (owning) retain count is expected categoryMemory (Core Foundation/Objective-C/OSObject) typeMethod should return an owned object - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextd2d9e8a977772482263591670a124c5d + issue_hash_content_of_line_in_context6ae8ea9fe4bf203e6b7bfaf649a6ca6a issue_context_kindfunction issue_contextcreateIncorrectlyAutoreleasedCFType issue_hash_function_offset2 @@ -25227,9 +25227,9 @@ descriptionReference-counted object is used after it is released categoryMemory (Core Foundation/Objective-C/OSObject) typeUse-after-release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextc483bb676bdbea00f7e99b3617b4b6e2 + issue_hash_content_of_line_in_contextd4e28f96fc8610b5b4b849f4760956eb issue_context_kindfunction issue_contextuseAfterRelease issue_hash_function_offset7 @@ -25484,9 +25484,9 @@ descriptionPotential leak of an object stored into 'obj' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context5bbb9b1720912f3fd2c67b3332de793b + issue_hash_content_of_line_in_context7986c4b7fb29301c109343dfe4155202 issue_context_kindfunction issue_contexttestAutoreleaseReturnsInput issue_hash_function_offset2 @@ -25742,9 +25742,9 @@ descriptionPotential leak of an object stored into 'arr' categoryMemory (Core Foundation/Objective-C/OSObject) typeLeak - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextea7d6978bcb6da71c23b4bb6fef51a87 + issue_hash_content_of_line_in_context2e0dbfdf379acf2f09e46db47d753e8a issue_context_kindfunction issue_contextautoreleaseReturningTypedObject issue_hash_function_offset1 @@ -25959,9 +25959,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_context1f4f3ca2f399a94e54304b4a0dcb1e85 + issue_hash_content_of_line_in_context41a2d6f91fdfa9b5f396102a60571e21 issue_context_kindfunction issue_contextautoreleaseObjC issue_hash_function_offset6 @@ -26117,9 +26117,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextced44137127627330194b72c97aef162 + issue_hash_content_of_line_in_context95dd5581ae4195b71e9a11f34290af5d issue_context_kindfunction issue_contexttestCFReturnsNotRetained issue_hash_function_offset4 @@ -26273,9 +26273,9 @@ descriptionIncorrect decrement of the reference count of an object that is not owned at this point by the caller categoryMemory (Core Foundation/Objective-C/OSObject) typeBad release - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contexte7615a640885cbd55bc856bfc07d7123 + issue_hash_content_of_line_in_context014103674df4a8a65a96bcdf936637a2 issue_context_kindfunction issue_contexttestCFReturnsNotRetainedAnnotated issue_hash_function_offset4 diff --git a/clang/test/Analysis/incorrect-checker-names.mm b/clang/test/Analysis/incorrect-checker-names.mm index 861f81e98eb1b..bf7c6c071153a 100644 --- a/clang/test/Analysis/incorrect-checker-names.mm +++ b/clang/test/Analysis/incorrect-checker-names.mm @@ -125,7 +125,7 @@ - (void)myMethodWhichMayFail:(NSError **)error { // expected-wa void use_out_param_leak() { OSObject *obj; // FIXME: This shouldn't be tied to a modeling checker. - write_into_out_param_on_success(&obj); // expected-warning{{Potential leak of an object stored into 'obj' [osx.cocoa.RetainCountBase]}} + write_into_out_param_on_success(&obj); // expected-warning{{Potential leak of an object stored into 'obj' [osx.cocoa.RetainCount]}} } typedef struct dispatch_queue_s *dispatch_queue_t; diff --git a/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.m.plist b/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.m.plist index 6b3f36721fb83..b14ffffbfc231 100644 --- a/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.m.plist +++ b/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.m.plist @@ -1965,9 +1965,9 @@ descriptionObject autoreleased too many times categoryMemory (Core Foundation/Objective-C/OSObject) typeObject autoreleased too many times - check_nameosx.cocoa.RetainCountBase + check_nameosx.cocoa.RetainCount - issue_hash_content_of_line_in_contextb6a556c71184371a9567489c8477c2f7 + issue_hash_content_of_line_in_contexta3c91a7a52619d81ebe032dcc49ebb93 issue_context_kindfunction issue_contexttestAutoreleaseTakesEffectInDispatch issue_hash_function_offset11 diff --git a/clang/test/Analysis/test-separate-retaincount.cpp b/clang/test/Analysis/test-separate-retaincount.cpp index 41efad452e5ac..5ca4907e7291c 100644 --- a/clang/test/Analysis/test-separate-retaincount.cpp +++ b/clang/test/Analysis/test-separate-retaincount.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_analyze_cc1 -std=c++14 -DNO_CF_OBJECT -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++14 -verify=no-retain-count %s \ // RUN: -analyzer-checker=core,osx \ // RUN: -analyzer-disable-checker osx.cocoa.RetainCount // -// RUN: %clang_analyze_cc1 -std=c++14 -DNO_OS_OBJECT -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++14 -verify=no-os-object %s \ // RUN: -analyzer-checker=core,osx \ // RUN: -analyzer-disable-checker osx.OSObjectRetainCount @@ -20,17 +20,11 @@ using size_t = decltype(sizeof(int)); void cf_overrelease() { CFTypeRef cf = CFCreate(); CFRelease(cf); - CFRelease(cf); -#ifndef NO_CF_OBJECT - // expected-warning@-2{{Reference-counted object is used after it is released}} -#endif + CFRelease(cf); // no-os-object-warning{{Reference-counted object is used after it is released}} } void osobject_overrelease() { OSObject *o = new OSObject; o->release(); - o->release(); -#ifndef NO_OS_OBJECT - // expected-warning@-2{{Reference-counted object is used after it is released}} -#endif + o->release(); // no-retain-count-warning{{Reference-counted object is used after it is released}} } From efd1a8e66eaa13afff709ebf16ff6280caa82ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= Date: Tue, 25 Feb 2020 18:02:18 +0100 Subject: [PATCH 158/770] [analyzer][MallocChecker] Make NewDeleteLeaks depend on DynamicMemoryModeling rather than NewDelete If you remember the mail [1] I sent out about how I envision the future of the already existing checkers to look dependencywise, one my main points was that no checker that emits diagnostics should be a dependency. This is more problematic for some checkers (ahem, RetainCount [2]) more than for others, like this one. The MallocChecker family is a mostly big monolithic modeling class some small reporting checkers that only come to action when we are constructing a warning message, after the actual bug was detected. The implication of this is that NewDeleteChecker doesn't really do anything to depend on, so this change was relatively simple. The only thing that complicates this change is that FreeMemAux (MallocCheckers method that models general memory deallocation) returns after calling a bug reporting method, regardless whether the report was ever emitted (which may not always happen, for instance, if the checker responsible for the report isn't enabled). This return unfortunately happens before cleaning up the maps in the GDM keeping track of the state of symbols (whether they are released, whether that release was successful, etc). What this means is that upon disabling some checkers, we would never clean up the map and that could've lead to false positives, e.g.: error: 'warning' diagnostics seen but not expected: File clang/test/Analysis/NewDelete-intersections.mm Line 66: Potential leak of memory pointed to by 'p' File clang/test/Analysis/NewDelete-intersections.mm Line 73: Potential leak of memory pointed to by 'p' File clang/test/Analysis/NewDelete-intersections.mm Line 77: Potential leak of memory pointed to by 'p' error: 'warning' diagnostics seen but not expected: File clang/test/Analysis/NewDelete-checker-test.cpp Line 111: Undefined or garbage value returned to caller File clang/test/Analysis/NewDelete-checker-test.cpp Line 200: Potential leak of memory pointed to by 'p' error: 'warning' diagnostics seen but not expected: File clang/test/Analysis/new.cpp Line 137: Potential leak of memory pointed to by 'x' There two possible approaches I had in mind: Make bug reporting methods of MallocChecker returns whether they succeeded, and proceed with the rest of FreeMemAux if not, Halt execution with a sink node upon failure. I decided to go with this, as described in the code. As you can see from the removed/changed test files, before the big checker dependency effort landed, there were tests to check for all the weird configurations of enabled/disabled checkers and their messy interactions, I largely repurposed these. [1] http://lists.llvm.org/pipermail/cfe-dev/2019-August/063070.html [2] http://lists.llvm.org/pipermail/cfe-dev/2019-August/063205.html Differential Revision: https://reviews.llvm.org/D77474 --- .../clang/StaticAnalyzer/Checkers/Checkers.td | 4 +- .../StaticAnalyzer/Checkers/MallocChecker.cpp | 153 ++++++++++-------- .../Malloc+NewDelete_intersections.cpp | 15 -- .../test/Analysis/NewDelete-checker-test.cpp | 105 ++++-------- .../test/Analysis/NewDelete-intersections.mm | 47 +++--- clang/test/Analysis/new.cpp | 11 +- 6 files changed, 154 insertions(+), 181 deletions(-) delete mode 100644 clang/test/Analysis/Malloc+NewDelete_intersections.cpp diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index bc4b7d00e2d40..2ba3881c61351 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -556,13 +556,13 @@ def NewDeleteChecker : Checker<"NewDelete">, def NewDeleteLeaksChecker : Checker<"NewDeleteLeaks">, HelpText<"Check for memory leaks. Traces memory managed by new/delete.">, - Dependencies<[NewDeleteChecker]>, + Dependencies<[DynamicMemoryModeling]>, Documentation; def PlacementNewChecker : Checker<"PlacementNew">, HelpText<"Check if default placement new is provided with pointers to " "sufficient storage capacity">, - Dependencies<[NewDeleteChecker]>, + Dependencies<[DynamicMemoryModeling]>, Documentation; def CXXSelfAssignmentChecker : Checker<"SelfAssignment">, diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index a7c62a7e8046f..fa69bc253fbd0 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -684,41 +684,42 @@ class MallocChecker static bool SummarizeValue(raw_ostream &os, SVal V); static bool SummarizeRegion(raw_ostream &os, const MemRegion *MR); - void ReportBadFree(CheckerContext &C, SVal ArgVal, SourceRange Range, - const Expr *DeallocExpr, AllocationFamily Family) const; + void HandleNonHeapDealloc(CheckerContext &C, SVal ArgVal, SourceRange Range, + const Expr *DeallocExpr, + AllocationFamily Family) const; - void ReportFreeAlloca(CheckerContext &C, SVal ArgVal, + void HandleFreeAlloca(CheckerContext &C, SVal ArgVal, SourceRange Range) const; - void ReportMismatchedDealloc(CheckerContext &C, SourceRange Range, + void HandleMismatchedDealloc(CheckerContext &C, SourceRange Range, const Expr *DeallocExpr, const RefState *RS, SymbolRef Sym, bool OwnershipTransferred) const; - void ReportOffsetFree(CheckerContext &C, SVal ArgVal, SourceRange Range, + void HandleOffsetFree(CheckerContext &C, SVal ArgVal, SourceRange Range, const Expr *DeallocExpr, AllocationFamily Family, const Expr *AllocExpr = nullptr) const; - void ReportUseAfterFree(CheckerContext &C, SourceRange Range, + void HandleUseAfterFree(CheckerContext &C, SourceRange Range, SymbolRef Sym) const; - void ReportDoubleFree(CheckerContext &C, SourceRange Range, bool Released, + void HandleDoubleFree(CheckerContext &C, SourceRange Range, bool Released, SymbolRef Sym, SymbolRef PrevSym) const; - void ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const; + void HandleDoubleDelete(CheckerContext &C, SymbolRef Sym) const; - void ReportUseZeroAllocated(CheckerContext &C, SourceRange Range, - SymbolRef Sym) const; + void HandleUseZeroAlloc(CheckerContext &C, SourceRange Range, + SymbolRef Sym) const; - void ReportFunctionPointerFree(CheckerContext &C, SVal ArgVal, - SourceRange Range, const Expr *FreeExpr, - AllocationFamily Family) const; + void HandleFunctionPtrFree(CheckerContext &C, SVal ArgVal, SourceRange Range, + const Expr *FreeExpr, + AllocationFamily Family) const; /// Find the location of the allocation for Sym on the path leading to the /// exploded node N. static LeakInfo getAllocationSite(const ExplodedNode *N, SymbolRef Sym, CheckerContext &C); - void reportLeak(SymbolRef Sym, ExplodedNode *N, CheckerContext &C) const; + void HandleLeak(SymbolRef Sym, ExplodedNode *N, CheckerContext &C) const; /// Test if value in ArgVal equals to value in macro `ZERO_SIZE_PTR`. bool isArgZERO_SIZE_PTR(ProgramStateRef State, CheckerContext &C, @@ -1743,6 +1744,15 @@ ProgramStateRef MallocChecker::FreeMemAux( const MemRegion *R = ArgVal.getAsRegion(); const Expr *ParentExpr = Call.getOriginExpr(); + // NOTE: We detected a bug, but the checker under whose name we would emit the + // error could be disabled. Generally speaking, the MallocChecker family is an + // integral part of the Static Analyzer, and disabling any part of it should + // only be done under exceptional circumstances, such as frequent false + // positives. If this is the case, we can reasonably believe that there are + // serious faults in our understanding of the source code, and even if we + // don't emit an warning, we should terminate further analysis with a sink + // node. + // Nonlocs can't be freed, of course. // Non-region locations (labels and fixed addresses) also shouldn't be freed. if (!R) { @@ -1752,7 +1762,8 @@ ProgramStateRef MallocChecker::FreeMemAux( // zero-sized memory block which is allowed to be freed, despite not being a // null pointer. if (Family != AF_Malloc || !isArgZERO_SIZE_PTR(State, C, ArgVal)) - ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, Family); + HandleNonHeapDealloc(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, + Family); return nullptr; } @@ -1760,7 +1771,8 @@ ProgramStateRef MallocChecker::FreeMemAux( // Blocks might show up as heap data, but should not be free()d if (isa(R)) { - ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, Family); + HandleNonHeapDealloc(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, + Family); return nullptr; } @@ -1778,9 +1790,10 @@ ProgramStateRef MallocChecker::FreeMemAux( // False negatives are better than false positives. if (isa(R)) - ReportFreeAlloca(C, ArgVal, ArgExpr->getSourceRange()); + HandleFreeAlloca(C, ArgVal, ArgExpr->getSourceRange()); else - ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, Family); + HandleNonHeapDealloc(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, + Family); return nullptr; } @@ -1802,14 +1815,14 @@ ProgramStateRef MallocChecker::FreeMemAux( // Memory returned by alloca() shouldn't be freed. if (RsBase->getAllocationFamily() == AF_Alloca) { - ReportFreeAlloca(C, ArgVal, ArgExpr->getSourceRange()); + HandleFreeAlloca(C, ArgVal, ArgExpr->getSourceRange()); return nullptr; } // Check for double free first. if ((RsBase->isReleased() || RsBase->isRelinquished()) && !didPreviousFreeFail(State, SymBase, PreviousRetStatusSymbol)) { - ReportDoubleFree(C, ParentExpr->getSourceRange(), RsBase->isReleased(), + HandleDoubleFree(C, ParentExpr->getSourceRange(), RsBase->isReleased(), SymBase, PreviousRetStatusSymbol); return nullptr; @@ -1821,8 +1834,8 @@ ProgramStateRef MallocChecker::FreeMemAux( // Check if an expected deallocation function matches the real one. bool DeallocMatchesAlloc = RsBase->getAllocationFamily() == Family; if (!DeallocMatchesAlloc) { - ReportMismatchedDealloc(C, ArgExpr->getSourceRange(), - ParentExpr, RsBase, SymBase, Hold); + HandleMismatchedDealloc(C, ArgExpr->getSourceRange(), ParentExpr, + RsBase, SymBase, Hold); return nullptr; } @@ -1833,7 +1846,7 @@ ProgramStateRef MallocChecker::FreeMemAux( !Offset.hasSymbolicOffset() && Offset.getOffset() != 0) { const Expr *AllocExpr = cast(RsBase->getStmt()); - ReportOffsetFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, + HandleOffsetFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, Family, AllocExpr); return nullptr; } @@ -1841,8 +1854,8 @@ ProgramStateRef MallocChecker::FreeMemAux( } if (SymBase->getType()->isFunctionPointerType()) { - ReportFunctionPointerFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, - Family); + HandleFunctionPtrFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, + Family); return nullptr; } @@ -2009,13 +2022,15 @@ bool MallocChecker::SummarizeRegion(raw_ostream &os, } } -void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal, - SourceRange Range, const Expr *DeallocExpr, - AllocationFamily Family) const { +void MallocChecker::HandleNonHeapDealloc(CheckerContext &C, SVal ArgVal, + SourceRange Range, + const Expr *DeallocExpr, + AllocationFamily Family) const { - if (!ChecksEnabled[CK_MallocChecker] && - !ChecksEnabled[CK_NewDeleteChecker]) + if (!ChecksEnabled[CK_MallocChecker] && !ChecksEnabled[CK_NewDeleteChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(Family); if (!CheckKind.hasValue()) @@ -2055,7 +2070,7 @@ void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal, } } -void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal, +void MallocChecker::HandleFreeAlloca(CheckerContext &C, SVal ArgVal, SourceRange Range) const { Optional CheckKind; @@ -2064,8 +2079,10 @@ void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal, CheckKind = CK_MallocChecker; else if (ChecksEnabled[CK_MismatchedDeallocatorChecker]) CheckKind = CK_MismatchedDeallocatorChecker; - else + else { + C.addSink(); return; + } if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_FreeAlloca[*CheckKind]) @@ -2081,15 +2098,16 @@ void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal, } } -void MallocChecker::ReportMismatchedDealloc(CheckerContext &C, +void MallocChecker::HandleMismatchedDealloc(CheckerContext &C, SourceRange Range, const Expr *DeallocExpr, - const RefState *RS, - SymbolRef Sym, + const RefState *RS, SymbolRef Sym, bool OwnershipTransferred) const { - if (!ChecksEnabled[CK_MismatchedDeallocatorChecker]) + if (!ChecksEnabled[CK_MismatchedDeallocatorChecker]) { + C.addSink(); return; + } if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_MismatchedDealloc) @@ -2137,14 +2155,15 @@ void MallocChecker::ReportMismatchedDealloc(CheckerContext &C, } } -void MallocChecker::ReportOffsetFree(CheckerContext &C, SVal ArgVal, +void MallocChecker::HandleOffsetFree(CheckerContext &C, SVal ArgVal, SourceRange Range, const Expr *DeallocExpr, AllocationFamily Family, const Expr *AllocExpr) const { - if (!ChecksEnabled[CK_MallocChecker] && - !ChecksEnabled[CK_NewDeleteChecker]) + if (!ChecksEnabled[CK_MallocChecker] && !ChecksEnabled[CK_NewDeleteChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(Family); if (!CheckKind.hasValue()) @@ -2194,13 +2213,14 @@ void MallocChecker::ReportOffsetFree(CheckerContext &C, SVal ArgVal, C.emitReport(std::move(R)); } -void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range, +void MallocChecker::HandleUseAfterFree(CheckerContext &C, SourceRange Range, SymbolRef Sym) const { - if (!ChecksEnabled[CK_MallocChecker] && - !ChecksEnabled[CK_NewDeleteChecker] && - !ChecksEnabled[CK_InnerPointerChecker]) + if (!ChecksEnabled[CK_MallocChecker] && !ChecksEnabled[CK_NewDeleteChecker] && + !ChecksEnabled[CK_InnerPointerChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(C, Sym); if (!CheckKind.hasValue()) @@ -2232,13 +2252,14 @@ void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range, } } -void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range, +void MallocChecker::HandleDoubleFree(CheckerContext &C, SourceRange Range, bool Released, SymbolRef Sym, SymbolRef PrevSym) const { - if (!ChecksEnabled[CK_MallocChecker] && - !ChecksEnabled[CK_NewDeleteChecker]) + if (!ChecksEnabled[CK_MallocChecker] && !ChecksEnabled[CK_NewDeleteChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(C, Sym); if (!CheckKind.hasValue()) @@ -2263,10 +2284,12 @@ void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range, } } -void MallocChecker::ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const { +void MallocChecker::HandleDoubleDelete(CheckerContext &C, SymbolRef Sym) const { - if (!ChecksEnabled[CK_NewDeleteChecker]) + if (!ChecksEnabled[CK_NewDeleteChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(C, Sym); if (!CheckKind.hasValue()) @@ -2287,13 +2310,13 @@ void MallocChecker::ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const { } } -void MallocChecker::ReportUseZeroAllocated(CheckerContext &C, - SourceRange Range, - SymbolRef Sym) const { +void MallocChecker::HandleUseZeroAlloc(CheckerContext &C, SourceRange Range, + SymbolRef Sym) const { - if (!ChecksEnabled[CK_MallocChecker] && - !ChecksEnabled[CK_NewDeleteChecker]) + if (!ChecksEnabled[CK_MallocChecker] && !ChecksEnabled[CK_NewDeleteChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(C, Sym); @@ -2318,12 +2341,14 @@ void MallocChecker::ReportUseZeroAllocated(CheckerContext &C, } } -void MallocChecker::ReportFunctionPointerFree(CheckerContext &C, SVal ArgVal, - SourceRange Range, - const Expr *FreeExpr, - AllocationFamily Family) const { - if (!ChecksEnabled[CK_MallocChecker]) +void MallocChecker::HandleFunctionPtrFree(CheckerContext &C, SVal ArgVal, + SourceRange Range, + const Expr *FreeExpr, + AllocationFamily Family) const { + if (!ChecksEnabled[CK_MallocChecker]) { + C.addSink(); return; + } Optional CheckKind = getCheckIfTracked(Family); if (!CheckKind.hasValue()) @@ -2521,7 +2546,7 @@ MallocChecker::LeakInfo MallocChecker::getAllocationSite(const ExplodedNode *N, return LeakInfo(AllocNode, ReferenceRegion); } -void MallocChecker::reportLeak(SymbolRef Sym, ExplodedNode *N, +void MallocChecker::HandleLeak(SymbolRef Sym, ExplodedNode *N, CheckerContext &C) const { if (!ChecksEnabled[CK_MallocChecker] && @@ -2637,7 +2662,7 @@ void MallocChecker::checkDeadSymbols(SymbolReaper &SymReaper, if (N) { for (SmallVectorImpl::iterator I = Errors.begin(), E = Errors.end(); I != E; ++I) { - reportLeak(*I, N, C); + HandleLeak(*I, N, C); } } } @@ -2822,7 +2847,7 @@ bool MallocChecker::checkUseAfterFree(SymbolRef Sym, CheckerContext &C, const Stmt *S) const { if (isReleased(Sym, C)) { - ReportUseAfterFree(C, S->getSourceRange(), Sym); + HandleUseAfterFree(C, S->getSourceRange(), Sym); return true; } @@ -2835,17 +2860,17 @@ void MallocChecker::checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C, if (const RefState *RS = C.getState()->get(Sym)) { if (RS->isAllocatedOfSizeZero()) - ReportUseZeroAllocated(C, RS->getStmt()->getSourceRange(), Sym); + HandleUseZeroAlloc(C, RS->getStmt()->getSourceRange(), Sym); } else if (C.getState()->contains(Sym)) { - ReportUseZeroAllocated(C, S->getSourceRange(), Sym); + HandleUseZeroAlloc(C, S->getSourceRange(), Sym); } } bool MallocChecker::checkDoubleDelete(SymbolRef Sym, CheckerContext &C) const { if (isReleased(Sym, C)) { - ReportDoubleDelete(C, Sym); + HandleDoubleDelete(C, Sym); return true; } return false; diff --git a/clang/test/Analysis/Malloc+NewDelete_intersections.cpp b/clang/test/Analysis/Malloc+NewDelete_intersections.cpp deleted file mode 100644 index 9140e1f4a372b..0000000000000 --- a/clang/test/Analysis/Malloc+NewDelete_intersections.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDelete -std=c++11 -verify %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDelete,cplusplus.NewDeleteLeaks -std=c++11 -verify %s - -typedef __typeof(sizeof(int)) size_t; -void *malloc(size_t); -void free(void *); - -//------------------------------------------------------------------- -// Check that unix.Malloc + cplusplus.NewDelete does not enable -// warnings produced by unix.MismatchedDeallocator. -//------------------------------------------------------------------- -void testMismatchedDeallocator() { - int *p = (int *)malloc(sizeof(int)); - delete p; -} // expected-warning{{Potential leak of memory pointed to by 'p'}} diff --git a/clang/test/Analysis/NewDelete-checker-test.cpp b/clang/test/Analysis/NewDelete-checker-test.cpp index ba179749510cc..f0d42171a8756 100644 --- a/clang/test/Analysis/NewDelete-checker-test.cpp +++ b/clang/test/Analysis/NewDelete-checker-test.cpp @@ -1,42 +1,31 @@ -// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++11 -fblocks %s \ +// RUN: -verify=expected,newdelete \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=cplusplus.NewDelete // -// RUN: %clang_analyze_cc1 -DLEAKS -std=c++11 -fblocks -verify %s \ +// RUN: %clang_analyze_cc1 -DLEAKS -std=c++11 -fblocks %s \ +// RUN: -verify=expected,newdelete,leak \ // RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=cplusplus.NewDelete \ // RUN: -analyzer-checker=cplusplus.NewDeleteLeaks // -// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++11 -fblocks %s \ +// RUN: -verify=expected,newdelete \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=cplusplus.NewDelete \ // RUN: -analyzer-config c++-allocator-inlining=true // -// RUN: %clang_analyze_cc1 -DLEAKS -std=c++11 -fblocks -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -verify %s \ +// RUN: -verify=expected,newdelete,leak \ // RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=cplusplus.NewDelete \ // RUN: -analyzer-checker=cplusplus.NewDeleteLeaks \ // RUN: -analyzer-config c++-allocator-inlining=true // -// RUN: %clang_analyze_cc1 -DTEST_INLINABLE_ALLOCATORS \ -// RUN: -std=c++11 -fblocks -verify %s \ -// RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=cplusplus.NewDelete -// -// RUN: %clang_analyze_cc1 -DLEAKS -DTEST_INLINABLE_ALLOCATORS \ -// RUN: -std=c++11 -fblocks -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -verify %s \ +// RUN: -verify=expected,leak \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=cplusplus.NewDeleteLeaks -// -// RUN: %clang_analyze_cc1 -DTEST_INLINABLE_ALLOCATORS \ -// RUN: -std=c++11 -fblocks -verify %s \ -// RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=cplusplus.NewDelete \ -// RUN: -analyzer-config c++-allocator-inlining=true -// -// RUN: %clang_analyze_cc1 -DLEAKS -DTEST_INLINABLE_ALLOCATORS \ -// RUN: -std=c++11 -fblocks -verify %s \ -// RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=cplusplus.NewDeleteLeaks \ -// RUN: -analyzer-config c++-allocator-inlining=true #include "Inputs/system-header-simulator-cxx.h" @@ -52,50 +41,28 @@ int *global; //----- Standard non-placement operators void testGlobalOpNew() { void *p = operator new(0); -} -#ifdef LEAKS -// expected-warning@-2{{Potential leak of memory pointed to by 'p'}} -#endif +} // leak-warning{{Potential leak of memory pointed to by 'p'}} void testGlobalOpNewArray() { void *p = operator new[](0); -} -#ifdef LEAKS -// expected-warning@-2{{Potential leak of memory pointed to by 'p'}} -#endif +} // leak-warning{{Potential leak of memory pointed to by 'p'}} void testGlobalNewExpr() { int *p = new int; -} -#ifdef LEAKS -// expected-warning@-2{{Potential leak of memory pointed to by 'p'}} -#endif +} // leak-warning{{Potential leak of memory pointed to by 'p'}} void testGlobalNewExprArray() { int *p = new int[0]; -} -#ifdef LEAKS -// expected-warning@-2{{Potential leak of memory pointed to by 'p'}} -#endif +} // leak-warning{{Potential leak of memory pointed to by 'p'}} //----- Standard nothrow placement operators void testGlobalNoThrowPlacementOpNewBeforeOverload() { void *p = operator new(0, std::nothrow); -} -#ifdef LEAKS -#ifndef TEST_INLINABLE_ALLOCATORS -// expected-warning@-3{{Potential leak of memory pointed to by 'p'}} -#endif -#endif +} // leak-warning{{Potential leak of memory pointed to by 'p'}} void testGlobalNoThrowPlacementExprNewBeforeOverload() { int *p = new(std::nothrow) int; -} -#ifdef LEAKS -#ifndef TEST_INLINABLE_ALLOCATORS -// expected-warning@-3{{Potential leak of memory pointed to by 'p'}} -#endif -#endif +} // leak-warning{{Potential leak of memory pointed to by 'p'}} //----- Standard pointer placement operators void testGlobalPointerPlacementNew() { @@ -135,13 +102,13 @@ void testNewInvalidationPlacement(PtrWrapper *w) { void testUseZeroAlloc1() { int *p = (int *)operator new(0); - *p = 1; // expected-warning {{Use of zero-allocated memory}} + *p = 1; // newdelete-warning {{Use of zero-allocated memory}} delete p; } int testUseZeroAlloc2() { int *p = (int *)operator new[](0); - return p[0]; // expected-warning {{Use of zero-allocated memory}} + return p[0]; // newdelete-warning {{Use of zero-allocated memory}} delete[] p; } @@ -149,7 +116,7 @@ void f(int); void testUseZeroAlloc3() { int *p = new int[0]; - f(*p); // expected-warning {{Use of zero-allocated memory}} + f(*p); // newdelete-warning {{Use of zero-allocated memory}} delete[] p; } @@ -168,70 +135,68 @@ void g(SomeClass &c, ...); void testUseFirstArgAfterDelete() { int *p = new int; delete p; - f(p); // expected-warning{{Use of memory after it is freed}} + f(p); // newdelete-warning{{Use of memory after it is freed}} } void testUseMiddleArgAfterDelete(int *p) { delete p; - f(0, p); // expected-warning{{Use of memory after it is freed}} + f(0, p); // newdelete-warning{{Use of memory after it is freed}} } void testUseLastArgAfterDelete(int *p) { delete p; - f(0, 0, p); // expected-warning{{Use of memory after it is freed}} + f(0, 0, p); // newdelete-warning{{Use of memory after it is freed}} } void testUseSeveralArgsAfterDelete(int *p) { delete p; - f(p, p, p); // expected-warning{{Use of memory after it is freed}} + f(p, p, p); // newdelete-warning{{Use of memory after it is freed}} } void testUseRefArgAfterDelete(SomeClass &c) { delete &c; - g(c); // expected-warning{{Use of memory after it is freed}} + g(c); // newdelete-warning{{Use of memory after it is freed}} } void testVariadicArgAfterDelete() { SomeClass c; int *p = new int; delete p; - g(c, 0, p); // expected-warning{{Use of memory after it is freed}} + g(c, 0, p); // newdelete-warning{{Use of memory after it is freed}} } void testUseMethodArgAfterDelete(int *p) { SomeClass *c = new SomeClass; delete p; - c->f(p); // expected-warning{{Use of memory after it is freed}} + c->f(p); // newdelete-warning{{Use of memory after it is freed}} } void testUseThisAfterDelete() { SomeClass *c = new SomeClass; delete c; - c->f(0); // expected-warning{{Use of memory after it is freed}} + c->f(0); // newdelete-warning{{Use of memory after it is freed}} } void testDoubleDelete() { int *p = new int; delete p; - delete p; // expected-warning{{Attempt to free released memory}} + delete p; // newdelete-warning{{Attempt to free released memory}} } void testExprDeleteArg() { int i; - delete &i; // expected-warning{{Argument to 'delete' is the address of the local variable 'i', which is not memory allocated by 'new'}} + delete &i; // newdelete-warning{{Argument to 'delete' is the address of the local variable 'i', which is not memory allocated by 'new'}} } void testExprDeleteArrArg() { int i; - delete[] &i; // expected-warning{{Argument to 'delete[]' is the address of the local variable 'i', which is not memory allocated by 'new[]'}} + delete[] & i; // newdelete-warning{{Argument to 'delete[]' is the address of the local variable 'i', which is not memory allocated by 'new[]'}} } void testAllocDeallocNames() { int *p = new(std::nothrow) int[1]; delete[] (++p); -#ifndef TEST_INLINABLE_ALLOCATORS - // expected-warning@-2{{Argument to 'delete[]' is offset by 4 bytes from the start of memory allocated by 'new[]'}} -#endif + // newdelete-warning@-1{{Argument to 'delete[]' is offset by 4 bytes from the start of memory allocated by 'new[]'}} } //-------------------------------- @@ -408,7 +373,7 @@ class DerefClass{ void testDoubleDeleteClassInstance() { DerefClass *foo = new DerefClass(); delete foo; - delete foo; // expected-warning {{Attempt to delete released memory}} + delete foo; // newdelete-warning {{Attempt to delete released memory}} } class EmptyClass{ @@ -420,7 +385,7 @@ class EmptyClass{ void testDoubleDeleteEmptyClass() { EmptyClass *foo = new EmptyClass(); delete foo; - delete foo; // expected-warning {{Attempt to delete released memory}} + delete foo; // newdelete-warning {{Attempt to delete released memory}} } struct Base { diff --git a/clang/test/Analysis/NewDelete-intersections.mm b/clang/test/Analysis/NewDelete-intersections.mm index b3707858f00ca..f01d62f8d365a 100644 --- a/clang/test/Analysis/NewDelete-intersections.mm +++ b/clang/test/Analysis/NewDelete-intersections.mm @@ -1,7 +1,20 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -verify %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks -std=c++11 -DLEAKS -fblocks -verify %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks -std=c++11 -DLEAKS -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s +// RUN: %clang_analyze_cc1 -std=c++11 -fblocks %s \ +// RUN: -verify=newdelete \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=cplusplus.NewDelete + +// RUN: %clang_analyze_cc1 -std=c++11 -DLEAKS -fblocks %s \ +// RUN: -verify=leak \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=cplusplus.NewDeleteLeaks + +// leak-no-diagnostics + +// RUN: %clang_analyze_cc1 -std=c++11 -DLEAKS -fblocks %s \ +// RUN: -verify=mismatch \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=unix.MismatchedDeallocator + #include "Inputs/system-header-simulator-cxx.h" #include "Inputs/system-header-simulator-objc.h" @@ -10,12 +23,6 @@ extern "C" void *alloca(size_t); extern "C" void free(void *); -//---------------------------------------------------------------------------- -// Check for intersections with unix.Malloc and unix.MallocWithAnnotations -// checkers bounded with cplusplus.NewDelete. -//---------------------------------------------------------------------------- - -//----- malloc()/free() are subjects of unix.Malloc and unix.MallocWithAnnotations void testMallocFreeNoWarn() { int i; free(&i); // no warn @@ -39,7 +46,8 @@ void testMallocFreeNoWarn() { void testDeleteMalloced() { int *p1 = (int *)malloc(sizeof(int)); - delete p1; // no warn + delete p1; + // mismatch-warning@-1{{Memory allocated by malloc() should be deallocated by free(), not 'delete'}} int *p2 = (int *)__builtin_alloca(sizeof(int)); delete p2; // no warn @@ -54,35 +62,30 @@ void testUseZeroAllocatedMalloced() { void testFreeOpNew() { void *p = operator new(0); free(p); + // mismatch-warning@-1{{Memory allocated by operator new should be deallocated by 'delete', not free()}} } -#ifdef LEAKS -// expected-warning@-2 {{Potential leak of memory pointed to by 'p'}} -#endif void testFreeNewExpr() { int *p = new int; free(p); + // mismatch-warning@-1{{Memory allocated by 'new' should be deallocated by 'delete', not free()}} + free(p); } -#ifdef LEAKS -// expected-warning@-2 {{Potential leak of memory pointed to by 'p'}} -#endif void testObjcFreeNewed() { int *p = new int; NSData *nsdata = [NSData dataWithBytesNoCopy:p length:sizeof(int) freeWhenDone:1]; -#ifdef LEAKS - // expected-warning@-2 {{Potential leak of memory pointed to by 'p'}} -#endif + // mismatch-warning@-1{{+dataWithBytesNoCopy:length:freeWhenDone: cannot take ownership of memory allocated by 'new'}} } void testFreeAfterDelete() { int *p = new int; delete p; - free(p); // expected-warning{{Use of memory after it is freed}} + free(p); // newdelete-warning{{Use of memory after it is freed}} } void testStandardPlacementNewAfterDelete() { int *p = new int; delete p; - p = new(p) int; // expected-warning{{Use of memory after it is freed}} + p = new (p) int; // newdelete-warning{{Use of memory after it is freed}} } diff --git a/clang/test/Analysis/new.cpp b/clang/test/Analysis/new.cpp index 3384cfeb61417..2c3eb2825a6bb 100644 --- a/clang/test/Analysis/new.cpp +++ b/clang/test/Analysis/new.cpp @@ -115,11 +115,6 @@ void testUseAfter(int *p) { delete c; } -//-------------------------------------------------------------------- -// Check for intersection with other checkers from MallocChecker.cpp -// bounded with unix.Malloc -//-------------------------------------------------------------------- - // new/delete oparators are subjects of cplusplus.NewDelete. void testNewDeleteNoWarn() { int i; @@ -135,11 +130,11 @@ void testNewDeleteNoWarn() { int *p3 = new int; // no-warning } -// unix.Malloc does not know about operators new/delete. void testDeleteMallocked() { int *x = (int *)malloc(sizeof(int)); - delete x; // FIXME: Should detect pointer escape and keep silent after 'delete' is modeled properly. -} // expected-warning{{Potential leak of memory pointed to by 'x'}} + // unix.MismatchedDeallocator would catch this, but we're not testing it here. + delete x; +} void testDeleteOpAfterFree() { int *p = (int *)malloc(sizeof(int)); From a924dac44f31ffa19508165fc61a9f10cd1d4836 Mon Sep 17 00:00:00 2001 From: stevewan Date: Tue, 26 May 2020 15:39:21 -0400 Subject: [PATCH 159/770] [NFC] Fix formatting for the 'aix-ld.c' test case. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Based on comments received in D80415 pertinent to test case format, the following fixes are provided to other tests in 'aix-ld.c' for the sake of consistency and readability, - Align flags in RUN directives vertically. - Align patterns in CHECK directives vertically. - Remove the ‘-o %t.o’ as it’s unnecessary for tests with ‘-###’. - Fix typos in comments. Reviewers: ZarkoCA, hubert.reinterpretcast, daltenty Reviewed By: hubert.reinterpretcast Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80532 --- clang/test/Driver/aix-ld.c | 249 ++++++++++++++++++------------------- 1 file changed, 124 insertions(+), 125 deletions(-) diff --git a/clang/test/Driver/aix-ld.c b/clang/test/Driver/aix-ld.c index 218fbd2bb3802..59e35248af30c 100644 --- a/clang/test/Driver/aix-ld.c +++ b/clang/test/Driver/aix-ld.c @@ -2,177 +2,177 @@ // sysroot to make these tests independent of the host system. // Check powerpc-ibm-aix7.1.0.0, 32-bit. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target powerpc-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD32 %s // CHECK-LD32-NOT: warning: -// CHECK-LD32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" -// CHECK-LD32: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD32: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32: "{{.*}}ld{{(.exe)?}}" // CHECK-LD32-NOT: "-bnso" -// CHECK-LD32: "-b32" -// CHECK-LD32: "-bpT:0x10000000" "-bpD:0x20000000" -// CHECK-LD32: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" -// CHECK-LD32: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD32: "-lc" +// CHECK-LD32: "-b32" +// CHECK-LD32: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" +// CHECK-LD32: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD32: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target powerpc64-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -target powerpc64-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD64 %s // CHECK-LD64-NOT: warning: -// CHECK-LD64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" -// CHECK-LD64: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD64: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" +// CHECK-LD64: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD64: "{{.*}}ld{{(.exe)?}}" // CHECK-LD64-NOT: "-bnso" -// CHECK-LD64: "-b64" -// CHECK-LD64: "-bpT:0x100000000" "-bpD:0x110000000" -// CHECK-LD64: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0_64.o" -// CHECK-LD64: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD64: "-lc" +// CHECK-LD64: "-b64" +// CHECK-LD64: "-bpT:0x100000000" "-bpD:0x110000000" +// CHECK-LD64: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0_64.o" +// CHECK-LD64: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD64: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Enable POSIX thread support. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -pthread \ -// RUN: -target powerpc-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -pthread \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD32-PTHREAD %s // CHECK-LD32-PTHREAD-NOT: warning: -// CHECK-LD32-PTHREAD: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" -// CHECK-LD32-PTHREAD: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD32-PTHREAD: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32-PTHREAD: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-PTHREAD: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32-PTHREAD: "{{.*}}ld{{(.exe)?}}" // CHECK-LD32-PTHREAD-NOT: "-bnso" -// CHECK-LD32-PTHREAD: "-b32" -// CHECK-LD32-PTHREAD: "-bpT:0x10000000" "-bpD:0x20000000" -// CHECK-LD32-PTHREAD: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" -// CHECK-LD32-PTHREAD: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD32-PTHREAD: "-lpthreads" -// CHECK-LD32-PTHREAD: "-lc" +// CHECK-LD32-PTHREAD: "-b32" +// CHECK-LD32-PTHREAD: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32-PTHREAD: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" +// CHECK-LD32-PTHREAD: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD32-PTHREAD: "-lpthreads" +// CHECK-LD32-PTHREAD: "-lc" -// Check powerpc-ibm-aix7.1.0.0, 64-bit. POSIX thread alias. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -pthreads \ -// RUN: -target powerpc64-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// Check powerpc64-ibm-aix7.1.0.0, 64-bit. POSIX thread alias. +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -pthreads \ +// RUN: -target powerpc64-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD64-PTHREAD %s // CHECK-LD64-PTHREAD-NOT: warning: -// CHECK-LD64-PTHREAD: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" -// CHECK-LD64-PTHREAD: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD64-PTHREAD: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD64-PTHREAD: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" +// CHECK-LD64-PTHREAD: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD64-PTHREAD: "{{.*}}ld{{(.exe)?}}" // CHECK-LD64-PTHREAD-NOT: "-bnso" -// CHECK-LD64-PTHREAD: "-b64" -// CHECK-LD64-PTHREAD: "-bpT:0x100000000" "-bpD:0x110000000" -// CHECK-LD64-PTHREAD: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0_64.o" -// CHECK-LD64-PTHREAD: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD64-PTHREAD: "-lpthreads" -// CHECK-LD64-PTHREAD: "-lc" +// CHECK-LD64-PTHREAD: "-b64" +// CHECK-LD64-PTHREAD: "-bpT:0x100000000" "-bpD:0x110000000" +// CHECK-LD64-PTHREAD: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0_64.o" +// CHECK-LD64-PTHREAD: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD64-PTHREAD: "-lpthreads" +// CHECK-LD64-PTHREAD: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Enable profiling. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -p \ -// RUN: -target powerpc-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -p \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD32-PROF %s // CHECK-LD32-PROF-NOT: warning: -// CHECK-LD32-PROF: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" -// CHECK-LD32-PROF: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD32-PROF: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32-PROF: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-PROF: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32-PROF: "{{.*}}ld{{(.exe)?}}" // CHECK-LD32-PROF-NOT: "-bnso" -// CHECK-LD32-PROF: "-b32" -// CHECK-LD32-PROF: "-bpT:0x10000000" "-bpD:0x20000000" -// CHECK-LD32-PROF: "[[SYSROOT]]/usr/lib{{/|\\\\}}mcrt0.o" -// CHECK-LD32-PROF: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD32-PROF: "-lc" +// CHECK-LD32-PROF: "-b32" +// CHECK-LD32-PROF: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32-PROF: "[[SYSROOT]]/usr/lib{{/|\\\\}}mcrt0.o" +// CHECK-LD32-PROF: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD32-PROF: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. Enable g-profiling. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -pg \ -// RUN: -target powerpc64-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -pg \ +// RUN: -target powerpc64-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD64-GPROF %s // CHECK-LD64-GPROF-NOT: warning: -// CHECK-LD64-GPROF: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" -// CHECK-LD64-GPROF: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD64-GPROF: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD64-GPROF: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" +// CHECK-LD64-GPROF: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD64-GPROF: "{{.*}}ld{{(.exe)?}}" // CHECK-LD64-GPROF-NOT: "-bnso" -// CHECK-LD64-GPROF: "-b64" -// CHECK-LD64-GPROF: "-bpT:0x100000000" "-bpD:0x110000000" -// CHECK-LD64-GPROF: "[[SYSROOT]]/usr/lib{{/|\\\\}}gcrt0_64.o" -// CHECK-LD64-GPROF: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD64-GPROF: "-lc" +// CHECK-LD64-GPROF: "-b64" +// CHECK-LD64-GPROF: "-bpT:0x100000000" "-bpD:0x110000000" +// CHECK-LD64-GPROF: "[[SYSROOT]]/usr/lib{{/|\\\\}}gcrt0_64.o" +// CHECK-LD64-GPROF: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD64-GPROF: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Static linking. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -static \ -// RUN: -target powerpc-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -static \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD32-STATIC %s // CHECK-LD32-STATIC-NOT: warning: -// CHECK-LD32-STATIC: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" -// CHECK-LD32-STATIC: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD32-STATIC: "{{.*}}ld{{(.exe)?}}" -// CHECK-LD32-STATIC: "-bnso" -// CHECK-LD32-STATIC: "-b32" -// CHECK-LD32-STATIC: "-bpT:0x10000000" "-bpD:0x20000000" -// CHECK-LD32-STATIC: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" -// CHECK-LD32-STATIC: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD32-STATIC: "-lc" +// CHECK-LD32-STATIC: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-STATIC: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32-STATIC: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32-STATIC: "-bnso" +// CHECK-LD32-STATIC: "-b32" +// CHECK-LD32-STATIC: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32-STATIC: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" +// CHECK-LD32-STATIC: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD32-STATIC: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Library search path. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -L%S/Inputs/aix_ppc_tree/powerpc-ibm-aix7.1.0.0 \ -// RUN: -target powerpc-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -L%S/Inputs/aix_ppc_tree/powerpc-ibm-aix7.1.0.0 \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD32-LIBP %s // CHECK-LD32-LIBP-NOT: warning: -// CHECK-LD32-LIBP: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" -// CHECK-LD32-LIBP: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD32-LIBP: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32-LIBP: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-LIBP: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32-LIBP: "{{.*}}ld{{(.exe)?}}" // CHECK-LD32-LIBP-NOT: "-bnso" -// CHECK-LD32-LIBP: "-b32" -// CHECK-LD32-LIBP: "-bpT:0x10000000" "-bpD:0x20000000" -// CHECK-LD32-LIBP: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" -// CHECK-LD32-LIBP: "-L[[SYSROOT]]/powerpc-ibm-aix7.1.0.0" -// CHECK-LD32-LIBP: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD32-LIBP: "-lc" +// CHECK-LD32-LIBP: "-b32" +// CHECK-LD32-LIBP: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32-LIBP: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" +// CHECK-LD32-LIBP: "-L[[SYSROOT]]/powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-LIBP: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD32-LIBP: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. nostdlib. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -nostdlib \ -// RUN: -pthread \ -// RUN: -target powerpc-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -nostdlib \ +// RUN: -pthread \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD32-NO-STD-LIB %s // CHECK-LD32-NO-STD-LIB-NOT: warning: -// CHECK-LD32-NO-STD-LIB: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" -// CHECK-LD32-NO-STD-LIB: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD32-NO-STD-LIB: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD32-NO-STD-LIB: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-LD32-NO-STD-LIB: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD32-NO-STD-LIB: "{{.*}}ld{{(.exe)?}}" // CHECK-LD32-NO-STD-LIB-NOT: "-bnso" -// CHECK-LD32-NO-STD-LIB: "-b32" -// CHECK-LD32-NO-STD-LIB: "-bpT:0x10000000" "-bpD:0x20000000" +// CHECK-LD32-NO-STD-LIB: "-b32" +// CHECK-LD32-NO-STD-LIB: "-bpT:0x10000000" "-bpD:0x20000000" // CHECK-LD32-NO-STD-LIB-NOT: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" -// CHECK-LD32-NO-STD-LIB: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD32-NO-STD-LIB: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-NO-STD-LIB-NOT: "-lpthreads" // CHECK-LD32-NO-STD-LIB-NOT: "-lc" -// Check powerpc-ibm-aix7.1.0.0, 64-bit. nodefaultlibs. -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -nodefaultlibs \ -// RUN: -pthread \ -// RUN: -target powerpc64-ibm-aix7.1.0.0 \ -// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// Check powerpc64-ibm-aix7.1.0.0, 64-bit. nodefaultlibs. +// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \ +// RUN: -nodefaultlibs \ +// RUN: -pthread \ +// RUN: -target powerpc64-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ // RUN: | FileCheck --check-prefix=CHECK-LD64-NO-DEFAULT-LIBS %s // CHECK-LD64-NO-DEFAULT-LIBS-NOT: warning: -// CHECK-LD64-NO-DEFAULT-LIBS: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" -// CHECK-LD64-NO-DEFAULT-LIBS: "-isysroot" "[[SYSROOT:[^"]+]]" -// CHECK-LD64-NO-DEFAULT-LIBS: "{{.*}}ld{{(.exe)?}}" +// CHECK-LD64-NO-DEFAULT-LIBS: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" +// CHECK-LD64-NO-DEFAULT-LIBS: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LD64-NO-DEFAULT-LIBS: "{{.*}}ld{{(.exe)?}}" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-bnso" -// CHECK-LD64-NO-DEFAULT-LIBS: "-b64" -// CHECK-LD64-NO-DEFAULT-LIBS: "-bpT:0x100000000" "-bpD:0x110000000" -// CHECK-LD64-NO-DEFAULT-LIBS: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0_64.o" -// CHECK-LD64-NO-DEFAULT-LIBS: "-L[[SYSROOT]]/usr/lib" +// CHECK-LD64-NO-DEFAULT-LIBS: "-b64" +// CHECK-LD64-NO-DEFAULT-LIBS: "-bpT:0x100000000" "-bpD:0x110000000" +// CHECK-LD64-NO-DEFAULT-LIBS: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0_64.o" +// CHECK-LD64-NO-DEFAULT-LIBS: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lpthreads" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lc" @@ -181,8 +181,7 @@ // RUN: -Wl,-bnocdtors \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ -// RUN: | FileCheck --check-prefix=CHECK-LD32-CXX-ARG-ORDER %s - +// RUN: | FileCheck --check-prefix=CHECK-LD32-CXX-ARG-ORDER %s // CHECK-LD32-CXX-ARG-ORDER: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-CXX-ARG-ORDER: "-isysroot" "[[SYSROOT:[^"]+]]" // CHECK-LD32-CXX-ARG-ORDER: "{{.*}}ld{{(.exe)?}}" From 9a0b0855a96ad91e082c6fb066e0ebabe72eb6b3 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 26 May 2020 12:36:03 -0700 Subject: [PATCH 160/770] Modify verifier checks to support musttail + preallocated Summary: preallocated and musttail can work together, but we don't want to call @llvm.call.preallocated.setup() to modify the stack in musttail calls. So we shouldn't have the "preallocated" operand bundle when a preallocated call is musttail. Also disallow use of preallocated on calls without preallocated. Codegen not yet implemented. Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80581 --- llvm/docs/LangRef.rst | 17 +++++++++++------ llvm/lib/IR/Verifier.cpp | 16 ++++++++++------ llvm/test/Verifier/preallocated-invalid.ll | 21 +++++++++++++++++---- llvm/test/Verifier/preallocated-valid.ll | 11 +++++++++++ 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 0891392b1e61e..61a0085c6f881 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1065,17 +1065,22 @@ Currently, only the following parameter attributes are defined: form and the known alignment of the pointer specified to the call site. If the alignment is not specified, then the code generator makes a target-specific assumption. + +.. _attr_preallocated: + ``preallocated()`` This indicates that the pointer parameter should really be passed by value to the function, and that the pointer parameter's pointee has already been initialized before the call instruction. This attribute is only valid on LLVM pointer arguments. The argument must be the value returned by the appropriate - :ref:`llvm.call.preallocated.arg`, although is - ignored during codegen. + :ref:`llvm.call.preallocated.arg` on non + ``musttail`` calls, or the corresponding caller parameter in ``musttail`` + calls, although it is ignored during codegen. - Any function call with a ``preallocated`` attribute in any parameter - must have a ``"preallocated"`` operand bundle. + A non ``musttail`` function call with a ``preallocated`` attribute in + any parameter must have a ``"preallocated"`` operand bundle. A ``musttail`` + function call cannot have a ``"preallocated"`` operand bundle. The preallocated attribute requires a type argument, which must be the same as the pointee type of the argument. @@ -10634,8 +10639,8 @@ This instruction requires several arguments: #. The call will not cause unbounded stack growth if it is part of a recursive cycle in the call graph. - #. Arguments with the :ref:`inalloca ` attribute are - forwarded in place. + #. Arguments with the :ref:`inalloca ` or + :ref:`preallocated ` attribute are forwarded in place. #. If the musttail call appears in a function with the ``"thunk"`` attribute and the caller and callee both have varargs, than any unprototyped arguments in register or memory are forwarded to the callee. Similarly, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 4d64af3e8de4c..5ca6762d1c7fd 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2988,9 +2988,13 @@ void Verifier::visitCallBase(CallBase &Call) { if (Call.paramHasAttr(i, Attribute::Preallocated)) { Value *ArgVal = Call.getArgOperand(i); - Assert(Call.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0, - "preallocated operand requires a preallocated bundle", ArgVal, - Call); + bool hasOB = + Call.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0; + bool isMustTail = Call.isMustTailCall(); + Assert(hasOB != isMustTail, + "preallocated operand either requires a preallocated bundle or " + "the call to be musttail (but not both)", + ArgVal, Call); } } @@ -3150,9 +3154,6 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) { void Verifier::verifyMustTailCall(CallInst &CI) { Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI); - // FIXME: support musttail + preallocated - Assert(!CI.countOperandBundlesOfType(LLVMContext::OB_preallocated), - "musttail and preallocated not yet supported", &CI); // - The caller and callee prototypes must match. Pointer types of // parameters or return types may differ in pointee type, but not @@ -4533,6 +4534,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { ++NumPreallocatedArgs; } } + Assert(NumPreallocatedArgs != 0, + "cannot use preallocated intrinsics on a call without " + "preallocated arguments"); Assert(NumArgs->equalsInt(NumPreallocatedArgs), "llvm.call.preallocated.setup arg size must be equal to number " "of preallocated arguments " diff --git a/llvm/test/Verifier/preallocated-invalid.ll b/llvm/test/Verifier/preallocated-invalid.ll index faa4c7a9fbfe6..7fdab33167e51 100644 --- a/llvm/test/Verifier/preallocated-invalid.ll +++ b/llvm/test/Verifier/preallocated-invalid.ll @@ -89,7 +89,7 @@ define void @preallocated_attribute_type_mismatch() { ret void } -; CHECK: preallocated operand requires a preallocated bundle +; CHECK: preallocated operand either requires a preallocated bundle or the call to be musttail define void @preallocated_require_bundle() { %cs = call token @llvm.call.preallocated.setup(i32 1) %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) @@ -117,9 +117,22 @@ define void @preallocated_arg_token() { ret void } -; CHECK: musttail and preallocated not yet supported -define void @musttail() { +; CHECK: cannot use preallocated intrinsics on a call without preallocated arguments +define void @preallocated_no_preallocated_args() { %cs = call token @llvm.call.preallocated.setup(i32 0) - musttail call void @foo0() ["preallocated"(token %cs)] + call void @foo0() ["preallocated"(token %cs)] + ret void +} + +; CHECK: preallocated operand either requires a preallocated bundle or the call to be musttail +define void @musttail_and_bundle(i32* preallocated(i32) %a) { + %cs = call token @llvm.call.preallocated.setup(i32 0) + musttail call void @musttail_and_bundle(i32* preallocated(i32) %a) ["preallocated"(token %cs)] + ret void +} + +; CHECK: cannot guarantee tail call due to mismatched ABI impacting function attributes +define void @musttail_attr_no_match(i32* preallocated(i32) %a) { + musttail call void @musttail_and_bundle(i32* %a) ret void } diff --git a/llvm/test/Verifier/preallocated-valid.ll b/llvm/test/Verifier/preallocated-valid.ll index 07f748ca8678b..483493c0c7470 100644 --- a/llvm/test/Verifier/preallocated-valid.ll +++ b/llvm/test/Verifier/preallocated-valid.ll @@ -4,6 +4,7 @@ declare token @llvm.call.preallocated.setup(i32) declare i8* @llvm.call.preallocated.arg(token, i32) declare void @foo1(i32* preallocated(i32)) +declare i64 @foo1_i64(i32* preallocated(i32)) declare void @foo2(i32* preallocated(i32), i32*, i32* preallocated(i32)) define void @preallocated() { @@ -38,3 +39,13 @@ define void @preallocated_num_args() { call void @foo2(i32* preallocated(i32) %x1, i32* %a, i32* preallocated(i32) %y1) ["preallocated"(token %cs)] ret void } + +define void @preallocate_musttail(i32* preallocated(i32) %a) { + musttail call void @foo1(i32* preallocated(i32) %a) + ret void +} + +define i64 @preallocate_musttail_i64(i32* preallocated(i32) %a) { + %r = musttail call i64 @foo1_i64(i32* preallocated(i32) %a) + ret i64 %r +} From 842a8cc10c4146cee6cedd94fbf556c94b8ec365 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 26 May 2020 16:49:56 -0700 Subject: [PATCH 161/770] [llvm-objcopy][MachO] Add support for removing Swift symbols cctools strip has the option "-T" which removes Swift symbols. This diff implements this option in llvm-strip for MachO. Test plan: make check-all Differential revision: https://reviews.llvm.org/D80099 --- llvm/docs/CommandGuide/llvm-strip.rst | 4 + .../MachO/remove-swift-symbols.test | 221 ++++++++++++++++++ llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp | 3 +- llvm/tools/llvm-objcopy/CopyConfig.cpp | 1 + llvm/tools/llvm-objcopy/CopyConfig.h | 1 + llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 4 +- .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 6 +- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 23 ++ llvm/tools/llvm-objcopy/MachO/MachOReader.h | 1 + llvm/tools/llvm-objcopy/MachO/Object.h | 7 + llvm/tools/llvm-objcopy/StripOpts.td | 3 + 11 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/MachO/remove-swift-symbols.test diff --git a/llvm/docs/CommandGuide/llvm-strip.rst b/llvm/docs/CommandGuide/llvm-strip.rst index 455dc07e9c5cb..a40537bd51c17 100644 --- a/llvm/docs/CommandGuide/llvm-strip.rst +++ b/llvm/docs/CommandGuide/llvm-strip.rst @@ -181,6 +181,10 @@ them. segments. Note that many tools will not be able to use an object without section headers. +.. option:: -T + + Remove Swift symbols. + EXIT STATUS ----------- diff --git a/llvm/test/tools/llvm-objcopy/MachO/remove-swift-symbols.test b/llvm/test/tools/llvm-objcopy/MachO/remove-swift-symbols.test new file mode 100644 index 0000000000000..a47a2dfb9f37b --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/remove-swift-symbols.test @@ -0,0 +1,221 @@ +## Verify that -T removes Swift symbols. +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__DATA \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000040070105 %s -o %t1 +# RUN: llvm-strip -x -T %t1 +# RUN: llvm-readobj -symbols %t1 | FileCheck --check-prefix=NO-SWIFT-SYMBOLS %s + +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__DATA_CONST \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000040070105 %s -o %t2 +# RUN: llvm-strip -x -T %t2 +# RUN: llvm-readobj -symbols %t2 | FileCheck --check-prefix=NO-SWIFT-SYMBOLS %s + +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__DATA_DIRTY \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000040070105 %s -o %t3 +# RUN: llvm-strip -x -T %t3 +# RUN: llvm-readobj -symbols %t3 | FileCheck --check-prefix=NO-SWIFT-SYMBOLS %s + +# NO-SWIFT-SYMBOLS: Symbols [ +# NO-SWIFT-SYMBOLS-NEXT: Symbol { +# NO-SWIFT-SYMBOLS-NEXT: Name: _main (1) +# NO-SWIFT-SYMBOLS-NEXT: Extern +# NO-SWIFT-SYMBOLS-NEXT: Type: Section (0xE) +# NO-SWIFT-SYMBOLS-NEXT: Section: __text (0x1) +# NO-SWIFT-SYMBOLS-NEXT: RefType: UndefinedNonLazy (0x0) +# NO-SWIFT-SYMBOLS-NEXT: Flags [ (0x0) +# NO-SWIFT-SYMBOLS-NEXT: ] +# NO-SWIFT-SYMBOLS-NEXT: Value: 0x100000B70 +# NO-SWIFT-SYMBOLS-NEXT: } +# NO-SWIFT-SYMBOLS-NEXT: ] + +## Verify that -T does not remove (public) Swift symbols when the binary +## does not contain __objc_imageinfo in one of the expected segments. + +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__DATA \ +# RUN: -D SECTION_NAME=__not_objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000040070105 %s -o %t4 +# RUN: llvm-strip -x -T %t4 +# RUN: llvm-readobj -symbols %t4 | FileCheck --check-prefix=SWIFT-SYMBOLS %s + +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__NOT_DATA \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000040070105 %s -o %t5 +# RUN: llvm-strip -x -T %t5 +# RUN: llvm-readobj -symbols %t5 | FileCheck --check-prefix=SWIFT-SYMBOLS %s + +## Verify that -T does not remove (public) Swift symbols when swift_version is zero. + +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__DATA \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000000000000 %s -o %t6 +# RUN: llvm-strip -x -T %t6 +# RUN: llvm-readobj -symbols %t6 | FileCheck --check-prefix=SWIFT-SYMBOLS %s + +## Verify that -T does not remove (public) Swift symbols when the binary +## contains invalid (too small) __objc_imageinfo. + +# RUN: yaml2obj -D FLAGS=0x00200085 -D SEGMENT_NAME=__DATA \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=4 \ +# RUN: -D SECTION_CONTENT=00000000 %s -o %t7 +# RUN: llvm-strip -x -T %t7 +# RUN: llvm-readobj -symbols %t7 | FileCheck --check-prefix=SWIFT-SYMBOLS %s + +## Verify that -T does not remove (public) Swift symbols +## when the flag MH_DYLDLINK is not set. + +# RUN: yaml2obj -D FLAGS=0x00200000 -D SEGMENT_NAME=__DATA \ +# RUN: -D SECTION_NAME=__objc_imageinfo -D SECTION_SIZE=8 \ +# RUN: -D SECTION_CONTENT=0000000040070105 %s -o %t8 +# RUN: llvm-strip -x -T %t8 +# RUN: llvm-readobj -symbols %t8 | FileCheck --check-prefix=SWIFT-SYMBOLS %s + +# SWIFT-SYMBOLS: Symbols [ +# SWIFT-SYMBOLS-NEXT: Symbol { +# SWIFT-SYMBOLS-NEXT: Name: _$S1a13PublicSymbol1Sivp (26) +# SWIFT-SYMBOLS-NEXT: Extern +# SWIFT-SYMBOLS-NEXT: Type: Section (0xE) +# SWIFT-SYMBOLS-NEXT: Section: __text (0x1) +# SWIFT-SYMBOLS-NEXT: RefType: UndefinedNonLazy (0x0) +# SWIFT-SYMBOLS-NEXT: Flags [ (0x0) +# SWIFT-SYMBOLS-NEXT: ] +# SWIFT-SYMBOLS-NEXT: Value: 0x100001160 +# SWIFT-SYMBOLS-NEXT: } +# SWIFT-SYMBOLS-NEXT: Symbol { +# SWIFT-SYMBOLS-NEXT: Name: _$s1a13PublicSymbol2Sivp (1) +# SWIFT-SYMBOLS-NEXT: Extern +# SWIFT-SYMBOLS-NEXT: Type: Section (0xE) +# SWIFT-SYMBOLS-NEXT: Section: __text (0x1) +# SWIFT-SYMBOLS-NEXT: RefType: UndefinedNonLazy (0x0) +# SWIFT-SYMBOLS-NEXT: Flags [ (0x0) +# SWIFT-SYMBOLS-NEXT: ] +# SWIFT-SYMBOLS-NEXT: Value: 0x100001168 +# SWIFT-SYMBOLS-NEXT: } +# SWIFT-SYMBOLS-NEXT: Symbol { +# SWIFT-SYMBOLS-NEXT: Name: _main (51) +# SWIFT-SYMBOLS-NEXT: Extern +# SWIFT-SYMBOLS-NEXT: Type: Section (0xE) +# SWIFT-SYMBOLS-NEXT: Section: __text (0x1) +# SWIFT-SYMBOLS-NEXT: RefType: UndefinedNonLazy (0x0) +# SWIFT-SYMBOLS-NEXT: Flags [ (0x0) +# SWIFT-SYMBOLS-NEXT: ] +# SWIFT-SYMBOLS-NEXT: Value: 0x100000B70 +# SWIFT-SYMBOLS-NEXT: } +# SWIFT-SYMBOLS-NEXT: ] + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x80000003 + filetype: 0x00000002 + ncmds: 4 + sizeofcmds: 400 + flags: [[FLAGS]] + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000100000B70 + size: 845 + offset: 0x00000B70 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: [[SEGMENT_NAME]] + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: [[SECTION_NAME]] + segname: [[SEGMENT_NAME]] + addr: 0x0000000100001090 + size: [[SECTION_SIZE]] + offset: 0x00001090 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: "[[SECTION_CONTENT]]" + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 188 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 8192 + nsyms: 5 + stroff: 8272 + strsize: 108 +LinkEditData: + NameList: + - n_strx: 50 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294971760 + - n_strx: 1 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294971768 + - n_strx: 74 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 4294971744 + - n_strx: 25 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 4294971752 + - n_strx: 99 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 4294970224 + StringTable: + - '' + - '_$s1a12LocalSymbol2Sivp' + - '_$s1a13PublicSymbol2Sivp' + - '_$S1a12LocalSymbol1Sivp' + - '_$S1a13PublicSymbol1Sivp' + - _main + - '' + - '' + - '' +... diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp index 2e363f26eaccc..43ec2b1fa82f2 100644 --- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp @@ -251,7 +251,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || !Config.SetSectionAlignment.empty() || Config.ExtractDWO || Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO || - Config.StripNonAlloc || Config.StripSections || Config.Weaken || + Config.StripNonAlloc || Config.StripSections || + Config.StripSwiftSymbols || Config.Weaken || Config.DecompressDebugSections || Config.DiscardMode == DiscardType::Locals || !Config.SymbolsToAdd.empty() || Config.EntryExpr) { diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp index ff12e4bd89f3e..1e151f01e01e2 100644 --- a/llvm/tools/llvm-objcopy/CopyConfig.cpp +++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp @@ -912,6 +912,7 @@ parseStripOptions(ArrayRef ArgsArr, if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu); + Config.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols); Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug); Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); diff --git a/llvm/tools/llvm-objcopy/CopyConfig.h b/llvm/tools/llvm-objcopy/CopyConfig.h index be1dca46b9682..acf783c7f2789 100644 --- a/llvm/tools/llvm-objcopy/CopyConfig.h +++ b/llvm/tools/llvm-objcopy/CopyConfig.h @@ -219,6 +219,7 @@ struct CopyConfig { bool StripDebug = false; bool StripNonAlloc = false; bool StripSections = false; + bool StripSwiftSymbols = false; bool StripUnneeded = false; bool Weaken = false; bool DecompressDebugSections = false; diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index f7332b7f66fe0..8e14c887170d9 100644 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -604,7 +604,9 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { // system. The only priority is that keeps/copies overrule removes. static Error handleArgs(const CopyConfig &Config, Object &Obj, const Reader &Reader, ElfType OutputElfType) { - + if (Config.StripSwiftSymbols) + return createStringError(llvm::errc::invalid_argument, + "option not supported by llvm-objcopy for ELF"); if (!Config.SplitDWO.empty()) if (Error E = splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType)) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index be44fdbe45f95..ae8889af8c42f 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -65,13 +65,17 @@ static void updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { Sym.Name = std::string(I->getValue()); } - auto RemovePred = [Config](const std::unique_ptr &N) { + auto RemovePred = [Config, &Obj](const std::unique_ptr &N) { if (N->Referenced) return false; if (Config.StripAll) return true; if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) return true; + // This behavior is consistent with cctools' strip. + if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) && + Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol()) + return true; return false; }; diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index cf32f00f36153..39a8893c1eb1b 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -283,6 +283,28 @@ void MachOReader::readIndirectSymbolTable(Object &O) const { } } +void MachOReader::readSwiftVersion(Object &O) const { + struct ObjCImageInfo { + uint32_t Version; + uint32_t Flags; + } ImageInfo; + + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr
&Sec : LC.Sections) + if (Sec->Sectname == "__objc_imageinfo" && + (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || + Sec->Segname == "__DATA_DIRTY") && + Sec->Content.size() >= sizeof(ObjCImageInfo)) { + memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(ImageInfo.Version); + sys::swapByteOrder(ImageInfo.Flags); + } + O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; + return; + } +} + std::unique_ptr MachOReader::create() const { auto Obj = std::make_unique(); readHeader(*Obj); @@ -297,6 +319,7 @@ std::unique_ptr MachOReader::create() const { readDataInCodeData(*Obj); readFunctionStartsData(*Obj); readIndirectSymbolTable(*Obj); + readSwiftVersion(*Obj); return Obj; } diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h index 00c8f0d55f61f..a369907147d6f 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -39,6 +39,7 @@ class MachOReader : public Reader { void readDataInCodeData(Object &O) const; void readFunctionStartsData(Object &O) const; void readIndirectSymbolTable(Object &O) const; + void readSwiftVersion(Object &O) const; public: explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h index b0123732f80ae..b9ecd1e7818f2 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -115,6 +115,11 @@ struct SymbolEntry { return (n_type & MachO::N_TYPE) == MachO::N_UNDF; } + bool isSwiftSymbol() const { + return StringRef(Name).startswith("_$s") || + StringRef(Name).startswith("_$S"); + } + Optional section() const { return n_sect == MachO::NO_SECT ? None : Optional(n_sect); } @@ -298,6 +303,8 @@ struct Object { LinkData DataInCode; LinkData FunctionStarts; + Optional SwiftVersion; + /// The index of LC_SYMTAB load command if present. Optional SymTabCommandIndex; /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. diff --git a/llvm/tools/llvm-objcopy/StripOpts.td b/llvm/tools/llvm-objcopy/StripOpts.td index cd02cffae6732..001da23528d78 100644 --- a/llvm/tools/llvm-objcopy/StripOpts.td +++ b/llvm/tools/llvm-objcopy/StripOpts.td @@ -15,3 +15,6 @@ def d : Flag<["-"], "d">, def S : Flag<["-"], "S">, Alias, HelpText<"Alias for --strip-debug">; + +def strip_swift_symbols : Flag<["-"], "T">, + HelpText<"Remove Swift symbols">; From cf42b704391c44e84485dd2547ae006196998266 Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Thu, 21 May 2020 11:04:43 -0700 Subject: [PATCH 162/770] [mlir][shape] Add `shape.get_extent`. Summary: This op extracts an extent from a shape. This also is the first op which constant folds to shape.const_size, which revealed that shape.const_size needs a folder (ConstantLike ops seem to always need folders for the constant folding infra to work). Differential Revision: https://reviews.llvm.org/D80394 --- .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 32 ++++++++++++++++++ mlir/lib/Dialect/Shape/IR/Shape.cpp | 33 +++++++++++++++++++ mlir/test/Dialect/Shape/canonicalize.mlir | 30 +++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index 7d62cebff8e66..0278d7bbeb065 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -131,6 +131,7 @@ def Shape_ConstSizeOp : Shape_Op<"const_size", let results = (outs Shape_SizeType:$result); let assemblyFormat = "attr-dict $value"; + let hasFolder = 1; } def Shape_FromExtentsOp : Shape_Op<"from_extents", [ @@ -190,6 +191,37 @@ def Shape_ToExtentTensorOp : Shape_Op<"to_extent_tensor", []> { let hasFolder = 1; } +def Shape_GetExtentOp : Shape_Op<"get_extent", + [NoSideEffect, DeclareOpInterfaceMethods]> { + let summary = "Gets the specified extent from a shape"; + let description = [{ + Gets the extent indexed by `dim` from `shape`. + + If the shape is an error, it returns an error size. + }]; + let arguments = (ins + Shape_ShapeType:$shape, + Confined:$dim + ); + let results = (outs Shape_SizeType:$extent); + let assemblyFormat = "$shape `,` $dim attr-dict"; + + let builders = [ + // Builder that allows passing a simple integer instead of an IntegerAttr. + OpBuilder< + [{ + OpBuilder &builder, OperationState &result, + Value shape, int64_t dim + }], + [{ + build(builder, result, shape, builder.getI64IntegerAttr(dim)); + }] + > + ]; + + let hasFolder = 1; +} + def Shape_JoinOp : Shape_Op<"join", []> { let summary = "Returns the least general shape.size of its operands"; let description = [{ diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 5c6a0c2204c3c..095c41720fbae 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -245,6 +245,8 @@ ConstSizeOp::inferReturnTypes(MLIRContext *context, Optional location, return success(); } +OpFoldResult ConstSizeOp::fold(ArrayRef) { return valueAttr(); } + //===----------------------------------------------------------------------===// // FromExtentsOp //===----------------------------------------------------------------------===// @@ -267,6 +269,37 @@ OpFoldResult FromExtentsOp::fold(ArrayRef operands) { return builder.getI64TensorAttr(extents); } +//===----------------------------------------------------------------------===// +// GetExtentOp +//===----------------------------------------------------------------------===// + +LogicalResult +GetExtentOp::inferReturnTypes(MLIRContext *context, Optional location, + ValueRange operands, DictionaryAttr attributes, + RegionRange regions, + SmallVectorImpl &inferredReturnTypes) { + inferredReturnTypes.push_back(SizeType::get(context)); + return success(); +} + +OpFoldResult GetExtentOp::fold(ArrayRef operands) { + auto elements = operands[0].dyn_cast_or_null(); + if (!elements) + return nullptr; + uint64_t dimToGet = dim().getLimitedValue(); + // TODO: Constant fold this to some kind of constant error. + if (dimToGet >= (uint64_t)elements.getNumElements()) + return nullptr; + // This is a little inconvenient because getValue returns an IntegerAttr + // that is not of IndexType, but the result here needs to be of + // IndexType. + // TODO: Make ConstShapeOp hold an tensor of index instead of i64. + Builder builder(getContext()); + return builder.getIntegerAttr( + builder.getIndexType(), + elements.getValue({dimToGet}).getInt()); +} + //===----------------------------------------------------------------------===// // ShapeOfOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Shape/canonicalize.mlir b/mlir/test/Dialect/Shape/canonicalize.mlir index 2e35fc748d86b..018f5b212b4e4 100644 --- a/mlir/test/Dialect/Shape/canonicalize.mlir +++ b/mlir/test/Dialect/Shape/canonicalize.mlir @@ -106,3 +106,33 @@ func @no_fold(%arg0: index) -> !shape.shape { %ret = shape.from_extents %e0, %arg0 return %ret : !shape.shape } + +// ----- +// Canonicalization of shape.get_extent + +// Basic folding. +// CHECK-LABEL: func @basic +func @basic() -> !shape.size { + // CHECK: shape.const_size 2 + %0 = shape.const_shape [0, 1, 2] + %1 = shape.get_extent %0, 2 + return %1 : !shape.size +} + +// Should not fold. +// CHECK-LABEL: func @out_of_bounds +func @out_of_bounds() -> !shape.size { + // CHECK: shape.const_shape + // CHECK: shape.get_extent + %0 = shape.const_shape [0, 1, 2] + %1 = shape.get_extent %0, 3 + return %1 : !shape.size +} + +// Should not fold. +// CHECK-LABEL: func @not_const +func @not_const(%arg0: !shape.shape) -> !shape.size { + // CHECK: shape.get_extent + %0 = shape.get_extent %arg0, 3 + return %0 : !shape.size +} From e724db03752a0cd06a86153fea0d95e377f999c0 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 17:00:48 -0700 Subject: [PATCH 163/770] [lldb/Test] Modify TestSymbolTable.py for reproducers Work around global module caching during reproducer replay. See inline comment for the details. --- .../API/lang/objc/foundation/TestSymbolTable.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py index df4860f148260..02c551b83876f 100644 --- a/lldb/test/API/lang/objc/foundation/TestSymbolTable.py +++ b/lldb/test/API/lang/objc/foundation/TestSymbolTable.py @@ -39,7 +39,18 @@ def test_with_python_api(self): self.assertTrue(process, PROCESS_IS_VALID) # Create the filespec by which to locate our a.out module. - filespec = lldb.SBFileSpec(exe, False) + # + # - Use the absolute path to get the module for the current variant. + # - Use the relative path for reproducers. The modules are never + # orphaned because the SB objects are leaked intentionally. This + # causes LLDB to reuse the same module for every variant, because the + # UUID is the same for all the inferiors. FindModule below only + # compares paths and is oblivious to the fact that the UUIDs are the + # same. + if configuration.is_reproducer(): + filespec = lldb.SBFileSpec('a.out', False) + else: + filespec = lldb.SBFileSpec(exe, False) module = target.FindModule(filespec) self.assertTrue(module, VALID_MODULE) From 1079978b3c506abca2b4dd9a5b131c024330206b Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 11 May 2020 16:24:42 -0700 Subject: [PATCH 164/770] [lldb][Core] Remove dead codepath in Mangled Summary: Objective-C names are stored in m_demangled, not in m_mangled. The method in the condition will never return true. Differential Revision: https://reviews.llvm.org/D79823 --- lldb/source/Core/Mangled.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp index 56914ae117ddb..143ec8770bf47 100644 --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -413,8 +413,6 @@ lldb::LanguageType Mangled::GuessLanguage() const { const char *mangled_name = mangled.GetCString(); if (CPlusPlusLanguage::IsCPPMangledName(mangled_name)) return lldb::eLanguageTypeC_plus_plus; - else if (ObjCLanguage::IsPossibleObjCMethodName(mangled_name)) - return lldb::eLanguageTypeObjC; } else { // ObjC names aren't really mangled, so they won't necessarily be in the // mangled name slot. From b90eb0f23b5bf3db4a091748b3ea6de9a45645c9 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 26 May 2020 17:27:46 -0700 Subject: [PATCH 165/770] Autogen a couple of test files to make a future diff easier to read --- .../base-pointers-4.ll | 39 +++++++-- .../RewriteStatepointsForGC/basic.ll | 84 +++++++++++++------ .../deopt-lowering-attrs.ll | 15 +++- 3 files changed, 103 insertions(+), 35 deletions(-) diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll b/llvm/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll index b9f67c1a37400..7fe70b22eb100 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s ; RUN: opt < %s -passes=rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s @@ -10,17 +11,43 @@ declare i64 addrspace(1)* @generate_obj() declare void @consume_obj(i64 addrspace(1)*) define void @test(i32 %condition) gc "statepoint-example" { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token [[STATEPOINT_TOKEN]]) +; CHECK-NEXT: switch i32 [[CONDITION:%.*]], label [[DEST_A:%.*]] [ +; CHECK-NEXT: i32 0, label [[DEST_B:%.*]] +; CHECK-NEXT: i32 1, label [[DEST_C:%.*]] +; CHECK-NEXT: ] +; CHECK: dest_a: +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: dest_b: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: dest_c: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[OBJ_TO_CONSUME_BASE:%.*]] = phi i64 addrspace(1)* [ [[TMP0]], [[DEST_A]] ], [ null, [[DEST_B]] ], [ null, [[DEST_C]] ], !is_base_value !0 +; CHECK-NEXT: [[OBJ_TO_CONSUME:%.*]] = phi i64 addrspace(1)* [ [[TMP0]], [[DEST_A]] ], [ null, [[DEST_B]] ], [ null, [[DEST_C]] ] +; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 2882400000, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* [[OBJ_TO_CONSUME]], i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* [[OBJ_TO_CONSUME_BASE]], i64 addrspace(1)* [[OBJ_TO_CONSUME]]) +; CHECK-NEXT: [[OBJ_TO_CONSUME_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 13, i32 13) +; CHECK-NEXT: [[OBJ_TO_CONSUME_BASE_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[OBJ_TO_CONSUME_BASE_RELOCATED]] to i64 addrspace(1)* +; CHECK-NEXT: [[OBJ_TO_CONSUME_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 13, i32 14) +; CHECK-NEXT: [[OBJ_TO_CONSUME_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[OBJ_TO_CONSUME_RELOCATED]] to i64 addrspace(1)* +; CHECK-NEXT: br label [[MERGE_SPLIT:%.*]] +; CHECK: merge.split: +; CHECK-NEXT: [[STATEPOINT_TOKEN2:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) +; CHECK-NEXT: br label [[LOOP]] +; entry: br label %loop loop: ; preds = %merge.split, %entry -; CHECK: loop: -; CHECK: [[TOKEN_0:%[^ ]+]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3 -; CHECK-NEXT: [[RESULT_0:%[^ ]+]] = call i64 addrspace(1)* @llvm.experimental.gc.result %0 = call i64 addrspace(1)* @generate_obj() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ] switch i32 %condition, label %dest_a [ - i32 0, label %dest_b - i32 1, label %dest_c + i32 0, label %dest_b + i32 1, label %dest_c ] dest_a: ; preds = %loop @@ -33,8 +60,6 @@ dest_c: ; preds = %loop br label %merge merge: ; preds = %dest_c, %dest_b, %dest_a -; CHECK: merge: -; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ [[RESULT_0]], %dest_a ], [ null, %dest_b ], [ null, %dest_c ] %obj_to_consume = phi i64 addrspace(1)* [ %0, %dest_a ], [ null, %dest_b ], [ null, %dest_c ] call void @consume_obj(i64 addrspace(1)* %obj_to_consume) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ] br label %merge.split diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/basic.ll b/llvm/test/Transforms/RewriteStatepointsForGC/basic.ll index c1c160b14274b..8e052a61a4dc3 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/basic.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/basic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s ; RUN: opt -S -passes=rewrite-statepoints-for-gc < %s | FileCheck %s @@ -6,69 +7,104 @@ declare i32 @h() define i32 addrspace(1)* @f0(i32 addrspace(1)* %arg) gc "statepoint-example" { ; CHECK-LABEL: @f0( - entry: -; CHECK: [[TOKEN_0:%[^ ]+]] = call token {{[^@]*}} @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* [[ARG:%.*]]) +; CHECK-NEXT: [[ARG_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 8) +; CHECK-NEXT: [[ARG_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)* +; CHECK-NEXT: ret i32 addrspace(1)* [[ARG_RELOCATED_CASTED]] +; + entry: call void @g() [ "deopt"(i32 100) ] -; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_0]], i32 8, i32 8) ret i32 addrspace(1)* %arg } define i32 addrspace(1)* @f1(i32 addrspace(1)* %arg) gc "statepoint-example" personality i32 8 { ; CHECK-LABEL: @f1( - entry: -; CHECK: [[TOKEN_1:%[^ ]+]] = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* [[ARG:%.*]]) +; CHECK-NEXT: to label [[NORMAL_DEST:%.*]] unwind label [[UNWIND_DEST:%.*]] +; CHECK: normal_dest: +; CHECK-NEXT: [[ARG_RELOCATED1:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 8) +; CHECK-NEXT: [[ARG_RELOCATED1_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED1]] to i32 addrspace(1)* +; CHECK-NEXT: ret i32 addrspace(1)* [[ARG_RELOCATED1_CASTED]] +; CHECK: unwind_dest: +; CHECK-NEXT: [[LPAD:%.*]] = landingpad token +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[ARG_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[LPAD]], i32 8, i32 8) +; CHECK-NEXT: [[ARG_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)* +; CHECK-NEXT: resume token undef +; + entry: invoke void @g() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest - normal_dest: -; CHECK: %arg.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_1]], i32 8, i32 8) + normal_dest: ret i32 addrspace(1)* %arg - unwind_dest: + unwind_dest: %lpad = landingpad token cleanup resume token undef } define i32 addrspace(1)* @f2(i32 addrspace(1)* %arg) gc "statepoint-example" { ; CHECK-LABEL: @f2( - entry: -; CHECK: [[TOKEN_2:%[^ ]+]] = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* [[ARG:%.*]]) +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.experimental.gc.result.i32(token [[STATEPOINT_TOKEN]]) +; CHECK-NEXT: [[ARG_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 8) +; CHECK-NEXT: [[ARG_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)* +; CHECK-NEXT: store i32 [[VAL1]], i32 addrspace(1)* [[ARG_RELOCATED_CASTED]], align 4 +; CHECK-NEXT: ret i32 addrspace(1)* [[ARG_RELOCATED_CASTED]] +; + entry: %val = call i32 @h() [ "deopt"(i32 100) ] -; CHECK: [[RESULT_F2:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_2]]) -; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_2]], i32 8, i32 8) -; CHECK: %arg.relocated.casted = bitcast i8 addrspace(1)* %arg.relocated to i32 addrspace(1)* store i32 %val, i32 addrspace(1)* %arg -; CHECK: store i32 [[RESULT_F2]], i32 addrspace(1)* %arg.relocated.casted ret i32 addrspace(1)* %arg } define i32 addrspace(1)* @f3(i32 addrspace(1)* %arg) gc "statepoint-example" personality i32 8 { ; CHECK-LABEL: @f3( - entry: -; CHECK: [[TOKEN_3:%[^ ]+]] = invoke token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = invoke token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* [[ARG:%.*]]) +; CHECK-NEXT: to label [[NORMAL_DEST:%.*]] unwind label [[UNWIND_DEST:%.*]] +; CHECK: normal_dest: +; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.experimental.gc.result.i32(token [[STATEPOINT_TOKEN]]) +; CHECK-NEXT: [[ARG_RELOCATED2:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 8) +; CHECK-NEXT: [[ARG_RELOCATED2_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED2]] to i32 addrspace(1)* +; CHECK-NEXT: store i32 [[VAL1]], i32 addrspace(1)* [[ARG_RELOCATED2_CASTED]], align 4 +; CHECK-NEXT: ret i32 addrspace(1)* [[ARG_RELOCATED2_CASTED]] +; CHECK: unwind_dest: +; CHECK-NEXT: [[LPAD:%.*]] = landingpad token +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[ARG_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[LPAD]], i32 8, i32 8) +; CHECK-NEXT: [[ARG_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)* +; CHECK-NEXT: resume token undef +; + entry: %val = invoke i32 @h() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest - normal_dest: -; CHECK: [[RESULT_F3:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_3]]) -; CHECK: [[ARG_RELOCATED:%[^ ]+]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_3]], i32 8, i32 8) -; CHECK: [[ARG_RELOCATED_CASTED:%[^ ]+]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)* + normal_dest: store i32 %val, i32 addrspace(1)* %arg -; CHECK: store i32 [[RESULT_F3]], i32 addrspace(1)* [[ARG_RELOCATED_CASTED]] ret i32 addrspace(1)* %arg - unwind_dest: + unwind_dest: %lpad = landingpad token cleanup resume token undef } define i32 addrspace(1)* @f4(i32 addrspace(1)* %arg) gc "statepoint-example" { ; CHECK-LABEL: @f4( - entry: -; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 1, i32 2, i32 400, i8 90, +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 1, i32 2, i32 400, i8 90, i32 0, i32 addrspace(1)* [[ARG:%.*]]) +; CHECK-NEXT: [[ARG_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 9, i32 9) +; CHECK-NEXT: [[ARG_RELOCATED_CASTED:%.*]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)* +; CHECK-NEXT: ret i32 addrspace(1)* [[ARG_RELOCATED_CASTED]] +; + entry: call void @g() [ "gc-transition"(i32 400, i8 90) ] ret i32 addrspace(1)* %arg } diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll b/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll index d0a331905088f..65e38d9d37587 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s ; RUN: opt -passes=rewrite-statepoints-for-gc -S < %s | FileCheck %s ; Check that the "deopt-lowering" function attribute gets transcoded into @@ -12,9 +13,12 @@ declare void @baz() "deopt-lowering"="live-through" define void @test1() gc "statepoint-example" { ; CHECK-LABEL: @test1( -; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 1, i32 57) -; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 42) -; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 13) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 1, i32 57) +; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 42) +; CHECK-NEXT: [[STATEPOINT_TOKEN2:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 13) +; CHECK-NEXT: ret void +; entry: call void @foo() [ "deopt"(i32 57) ] @@ -26,7 +30,10 @@ entry: ; add deopt-lowering attribute as part of callsite define void @test2() gc "statepoint-example" { ; CHECK-LABEL: @test2( -; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 2, i32 0, i32 1, i32 57) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 2, i32 0, i32 1, i32 57) #0 +; CHECK-NEXT: ret void +; entry: call void @foo() "deopt-lowering"="live-in" [ "deopt"(i32 57) ] From bed6624ac43bc223114d0b9380d593f2dfd749ff Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 26 May 2020 17:33:07 -0700 Subject: [PATCH 166/770] Split a test file so that most of it can be autogened --- .../scalar-base-vector-2.ll | 76 +++++++++++++++++++ .../scalar-base-vector.ll | 74 +----------------- 2 files changed, 77 insertions(+), 73 deletions(-) create mode 100644 llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector-2.ll diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector-2.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector-2.ll new file mode 100644 index 0000000000000..1cfda09b2c1b0 --- /dev/null +++ b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector-2.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s +; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S | FileCheck %s + +; Assertions are almost autogenerated except for last testcase widget, which was +; updated (with -DAG instead of -NEXT) to fix buildbot failure reproducible only on two boxes. + +; Uses of extractelement that are of scalar type should not have the BDV +; incorrectly identified as a vector type. +define void @widget() gc "statepoint-example" { +; CHECK-LABEL: @widget( +; CHECK-NEXT: bb6: +; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x i8 addrspace(1)*> zeroinitializer, i32 1, !is_base_value !0 +; CHECK-NEXT: [[TMP:%.*]] = extractelement <2 x i8 addrspace(1)*> undef, i32 1 +; CHECK-NEXT: br i1 undef, label [[BB7:%.*]], label [[BB9:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP]], i64 12 +; CHECK-NEXT: br label [[BB11:%.*]] +; CHECK: bb9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP]], i64 12 +; CHECK-NEXT: br i1 undef, label [[BB11]], label [[BB15:%.*]] +; CHECK: bb11: +; CHECK-NEXT: [[TMP12_BASE:%.*]] = phi i8 addrspace(1)* [ [[BASE_EE]], [[BB7]] ], [ [[BASE_EE]], [[BB9]] ], !is_base_value !0 +; CHECK-NEXT: [[TMP12:%.*]] = phi i8 addrspace(1)* [ [[TMP8]], [[BB7]] ], [ [[TMP10]], [[BB9]] ] +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @snork, i32 0, i32 0, i32 0, i32 1, i32 undef, i8 addrspace(1)* [[TMP12_BASE]], i8 addrspace(1)* [[TMP12]]) +; CHECK-NEXT: [[TMP12_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 8) +; CHECK-NEXT: [[TMP12_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 9) +; CHECK-NEXT: br label [[BB15]] +; CHECK: bb15: +; CHECK-NEXT: [[TMP16_BASE:%.*]] = phi i8 addrspace(1)* [ [[BASE_EE]], [[BB9]] ], [ [[TMP12_BASE_RELOCATED]], [[BB11]] ], !is_base_value !0 +; CHECK-NEXT: [[TMP16:%.*]] = phi i8 addrspace(1)* [ [[TMP10]], [[BB9]] ], [ [[TMP12_RELOCATED]], [[BB11]] ] +; CHECK-NEXT: br i1 undef, label [[BB17:%.*]], label [[BB20:%.*]] +; CHECK: bb17: +; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @snork, i32 0, i32 0, i32 0, i32 1, i32 undef, i8 addrspace(1)* [[TMP16_BASE]], i8 addrspace(1)* [[TMP16]]) +; CHECK-NEXT: [[TMP16_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 8, i32 8) +; CHECK-NEXT: [[TMP16_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 8, i32 9) +; CHECK-NEXT: br label [[BB20]] +; CHECK: bb20: +; CHECK-DAG: [[DOT05:%.*]] = phi i8 addrspace(1)* [ [[TMP16_BASE_RELOCATED]], [[BB17]] ], [ [[TMP16_BASE]], [[BB15]] ] +; CHECK-DAG: [[DOT0:%.*]] = phi i8 addrspace(1)* [ [[TMP16_RELOCATED]], [[BB17]] ], [ [[TMP16]], [[BB15]] ] +; CHECK-NEXT: [[STATEPOINT_TOKEN2:%.*]] = call token (i64, i32, void (i8 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f(i64 2882400000, i32 0, void (i8 addrspace(1)*)* @foo, i32 1, i32 0, i8 addrspace(1)* [[DOT0]], i32 0, i32 0, i8 addrspace(1)* [[DOT05]], i8 addrspace(1)* [[DOT0]]) +; CHECK-NEXT: [[TMP16_BASE_RELOCATED3:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN2]], i32 8, i32 8) +; CHECK-NEXT: [[TMP16_RELOCATED4:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN2]], i32 8, i32 9) +; CHECK-NEXT: ret void +; +bb6: ; preds = %bb3 + %tmp = extractelement <2 x i8 addrspace(1)*> undef, i32 1 + br i1 undef, label %bb7, label %bb9 + +bb7: ; preds = %bb6 + %tmp8 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 12 + br label %bb11 + +bb9: ; preds = %bb6, %bb6 + %tmp10 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 12 + br i1 undef, label %bb11, label %bb15 + +bb11: ; preds = %bb9, %bb7 + %tmp12 = phi i8 addrspace(1)* [ %tmp8, %bb7 ], [ %tmp10, %bb9 ] + call void @snork() [ "deopt"(i32 undef) ] + br label %bb15 + +bb15: ; preds = %bb11, %bb9, %bb9 + %tmp16 = phi i8 addrspace(1)* [ %tmp10, %bb9 ], [ %tmp12, %bb11 ] + br i1 undef, label %bb17, label %bb20 + +bb17: ; preds = %bb15 + call void @snork() [ "deopt"(i32 undef) ] + br label %bb20 + +bb20: ; preds = %bb17, %bb15, %bb15 + call void @foo(i8 addrspace(1)* %tmp16) + ret void +} + +declare void @snork() +declare void @foo(i8 addrspace(1)*) diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll index e5e765be2b846..34af81cd7337e 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll @@ -1,9 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s ; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S | FileCheck %s -; Assertions are almost autogenerated except for last testcase widget, which was -; updated (with -DAG instead of -NEXT) to fix buildbot failure reproducible only on two boxes. - declare void @do_safepoint() declare i8 addrspace(1)* @def_ptr() @@ -194,75 +192,5 @@ latch: ; preds = %bb25, %bb7 br label %header } -; Uses of extractelement that are of scalar type should not have the BDV -; incorrectly identified as a vector type. -define void @widget() gc "statepoint-example" { -; CHECK-LABEL: @widget( -; CHECK-NEXT: bb6: -; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x i8 addrspace(1)*> zeroinitializer, i32 1, !is_base_value !0 -; CHECK-NEXT: [[TMP:%.*]] = extractelement <2 x i8 addrspace(1)*> undef, i32 1 -; CHECK-NEXT: br i1 undef, label [[BB7:%.*]], label [[BB9:%.*]] -; CHECK: bb7: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP]], i64 12 -; CHECK-NEXT: br label [[BB11:%.*]] -; CHECK: bb9: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP]], i64 12 -; CHECK-NEXT: br i1 undef, label [[BB11]], label [[BB15:%.*]] -; CHECK: bb11: -; CHECK-NEXT: [[TMP12_BASE:%.*]] = phi i8 addrspace(1)* [ [[BASE_EE]], [[BB7]] ], [ [[BASE_EE]], [[BB9]] ], !is_base_value !0 -; CHECK-NEXT: [[TMP12:%.*]] = phi i8 addrspace(1)* [ [[TMP8]], [[BB7]] ], [ [[TMP10]], [[BB9]] ] -; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @snork, i32 0, i32 0, i32 0, i32 1, i32 undef, i8 addrspace(1)* [[TMP12_BASE]], i8 addrspace(1)* [[TMP12]]) -; CHECK-NEXT: [[TMP12_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 8) -; CHECK-NEXT: [[TMP12_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 8, i32 9) -; CHECK-NEXT: br label [[BB15]] -; CHECK: bb15: -; CHECK-NEXT: [[TMP16_BASE:%.*]] = phi i8 addrspace(1)* [ [[BASE_EE]], [[BB9]] ], [ [[TMP12_BASE_RELOCATED]], [[BB11]] ], !is_base_value !0 -; CHECK-NEXT: [[TMP16:%.*]] = phi i8 addrspace(1)* [ [[TMP10]], [[BB9]] ], [ [[TMP12_RELOCATED]], [[BB11]] ] -; CHECK-NEXT: br i1 undef, label [[BB17:%.*]], label [[BB20:%.*]] -; CHECK: bb17: -; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @snork, i32 0, i32 0, i32 0, i32 1, i32 undef, i8 addrspace(1)* [[TMP16_BASE]], i8 addrspace(1)* [[TMP16]]) -; CHECK-NEXT: [[TMP16_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 8, i32 8) -; CHECK-NEXT: [[TMP16_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 8, i32 9) -; CHECK-NEXT: br label [[BB20]] -; CHECK: bb20: -; CHECK-DAG: [[DOT05:%.*]] = phi i8 addrspace(1)* [ [[TMP16_BASE_RELOCATED]], [[BB17]] ], [ [[TMP16_BASE]], [[BB15]] ] -; CHECK-DAG: [[DOT0:%.*]] = phi i8 addrspace(1)* [ [[TMP16_RELOCATED]], [[BB17]] ], [ [[TMP16]], [[BB15]] ] -; CHECK-NEXT: [[STATEPOINT_TOKEN2:%.*]] = call token (i64, i32, void (i8 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f(i64 2882400000, i32 0, void (i8 addrspace(1)*)* @foo, i32 1, i32 0, i8 addrspace(1)* [[DOT0]], i32 0, i32 0, i8 addrspace(1)* [[DOT05]], i8 addrspace(1)* [[DOT0]]) -; CHECK-NEXT: [[TMP16_BASE_RELOCATED3:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN2]], i32 8, i32 8) -; CHECK-NEXT: [[TMP16_RELOCATED4:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN2]], i32 8, i32 9) -; CHECK-NEXT: ret void -; -bb6: ; preds = %bb3 - %tmp = extractelement <2 x i8 addrspace(1)*> undef, i32 1 - br i1 undef, label %bb7, label %bb9 - -bb7: ; preds = %bb6 - %tmp8 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 12 - br label %bb11 - -bb9: ; preds = %bb6, %bb6 - %tmp10 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 12 - br i1 undef, label %bb11, label %bb15 - -bb11: ; preds = %bb9, %bb7 - %tmp12 = phi i8 addrspace(1)* [ %tmp8, %bb7 ], [ %tmp10, %bb9 ] - call void @snork() [ "deopt"(i32 undef) ] - br label %bb15 - -bb15: ; preds = %bb11, %bb9, %bb9 - %tmp16 = phi i8 addrspace(1)* [ %tmp10, %bb9 ], [ %tmp12, %bb11 ] - br i1 undef, label %bb17, label %bb20 - -bb17: ; preds = %bb15 - call void @snork() [ "deopt"(i32 undef) ] - br label %bb20 - -bb20: ; preds = %bb17, %bb15, %bb15 - call void @foo(i8 addrspace(1)* %tmp16) - ret void -} - -declare void @snork() -declare void @foo(i8 addrspace(1)*) declare void @spam() declare <2 x i8 addrspace(1)*> @baz() From 40c4ecabc238cfdd639bc1e927800337457e69e3 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 26 May 2020 17:22:53 -0700 Subject: [PATCH 167/770] [lldb/Docs] Add the application speicfic lldbinit to the man page This used to be part of the man page but got lost when we moved to generating it with Sphinx. --- lldb/docs/man/lldb.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lldb/docs/man/lldb.rst b/lldb/docs/man/lldb.rst index a3a0736680ad4..842a693f47518 100644 --- a/lldb/docs/man/lldb.rst +++ b/lldb/docs/man/lldb.rst @@ -303,10 +303,13 @@ CONFIGURATION FILES ------------------- :program:`lldb` reads things like settings, aliases and commands from the -.lldbinit file. It will first look for ~/.lldbinit and load that first. -Secondly, it will look for an .lldbinit file in the current working directory. -For security reasons, :program:`lldb` will print a warning and not source this -file by default. This behavior can be changed by changing the +.lldbinit file. First, it will read the application specific init file whose +name is ~/.lldbinit followed by a "-" and the name of the current program. This +would be ~/.lldbinit-lldb for the command line :program:`lldb` and +~/.lldbinit-Xcode for Xcode. Secondly, the global ~/.lldbinit will be read. +Finally, :program:`lldb` will look for an .lldbinit file in the current working +directory. For security reasons, :program:`lldb` will print a warning and not +source this file by default. This behavior can be changed by changing the target.load-cwd-lldbinit setting. To always load the .lldbinit file in the current working directory, add the From 323d850427472ed060fc4c495b2010e6174b875b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 26 May 2020 17:34:54 -0700 Subject: [PATCH 168/770] Add self as code owner for SCEV and IndVars This was discussed on llvm-dev thread "Transferring code ownership for SCEV and IndVars" a few months back. I just forgot to make the actual change. --- llvm/CODE_OWNERS.TXT | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT index 35f8ef81c1ea9..5cc5b87364c15 100644 --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -52,8 +52,8 @@ N: Pete Couperus E: petecoup@synopsys.com D: ARC backend (lib/Target/ARC/*) -N: Sanjoy Das -E: sanjoy@playingwithpointers.com +N: Philip Reames +E: listmail@philipreames.com D: IndVar Simplify, Scalar Evolution N: Marshall Clow From ae597a771ed4d7530e2ef232d02a253067e3312f Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 26 May 2020 12:56:14 -0700 Subject: [PATCH 169/770] [AArch64][GlobalISel] Do not modify predicate when optimizing G_ICMP This fixes a bug in `tryOptArithImmedIntegerCompare`. It is unsafe to update the predicate on a MachineOperand when optimizing a G_ICMP, because it may be used in more than one place. For example, when we are optimizing G_SELECT, we allow compares which are used in more than one G_SELECT. If we modify the G_ICMP, then we'll break one of the G_SELECTs. Since the compare is being produced to either 1) Select a G_ICMP 2) Fold a G_ICMP into an instruction when profitable there's no reason to actually modify it. The change is local to the specific compare. Instead, pass a `CmpInst::Predicate` to `tryOptArithImmedIntegerCompare` which can be modified by reference. Differential Revision: https://reviews.llvm.org/D80585 --- .../AArch64/AArch64InstructionSelector.cpp | 33 ++++---- .../GlobalISel/select-arith-immed-compare.mir | 78 +++++++++++++++++++ 2 files changed, 97 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 57eaf140a6380..1b321260ed02c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -155,7 +155,9 @@ class AArch64InstructionSelector : public InstructionSelector { // Emit an integer compare between LHS and RHS, which checks for Predicate. // - // This may update Predicate when emitting the compare. + // This returns the produced compare instruction, and the predicate which + // was ultimately used in the compare. The predicate may differ from what + // is passed in \p Predicate due to optimization. std::pair emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, @@ -307,7 +309,7 @@ class AArch64InstructionSelector : public InstructionSelector { MachineIRBuilder &MIRBuilder) const; MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, + CmpInst::Predicate &Predicate, MachineIRBuilder &MIB) const; MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS, MachineOperand &RHS, @@ -3685,13 +3687,16 @@ AArch64InstructionSelector::emitIntegerCompare( MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); + assert(Predicate.isPredicate() && "Expected predicate?"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); + // Fold the compare if possible. MachineInstr *FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); if (FoldCmp) - return {FoldCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + return {FoldCmp, P}; // Can't fold into a CMN. Just emit a normal compare. unsigned CmpOpc = 0; @@ -3712,21 +3717,21 @@ AArch64InstructionSelector::emitIntegerCompare( // Try to match immediate forms. MachineInstr *ImmedCmp = - tryOptArithImmedIntegerCompare(LHS, RHS, Predicate, MIRBuilder); + tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder); if (ImmedCmp) - return {ImmedCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + return {ImmedCmp, P}; // If we don't have an immediate, we may have a shift which can be folded // into the compare. MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder); if (ShiftedCmp) - return {ShiftedCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + return {ShiftedCmp, P}; auto CmpMI = MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); // Make sure that we can constrain the compare that we emitted. constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); - return {&*CmpMI, (CmpInst::Predicate)Predicate.getPredicate()}; + return {&*CmpMI, P}; } MachineInstr *AArch64InstructionSelector::emitVectorConcat( @@ -4042,7 +4047,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( } MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( - MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P, MachineIRBuilder &MIB) const { // Attempt to select the immediate form of an integer compare. MachineRegisterInfo &MRI = *MIB.getMRI(); @@ -4051,7 +4056,6 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( unsigned Size = Ty.getSizeInBits(); assert((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit compare only?"); - auto P = (CmpInst::Predicate)Predicate.getPredicate(); // Check if this is a case we can already handle. InstructionSelector::ComplexRendererFns ImmFns; @@ -4066,6 +4070,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( // We have a constant, but it doesn't fit. Try adjusting it by one and // updating the predicate if possible. uint64_t C = *MaybeImmed; + CmpInst::Predicate NewP; switch (P) { default: return nullptr; @@ -4080,7 +4085,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( if ((Size == 64 && static_cast(C) == INT64_MIN) || (Size == 32 && static_cast(C) == INT32_MIN)) return nullptr; - P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; + NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; C -= 1; break; case CmpInst::ICMP_ULT: @@ -4093,7 +4098,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( // When c is not zero. if (C == 0) return nullptr; - P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; + NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; C -= 1; break; case CmpInst::ICMP_SLE: @@ -4107,7 +4112,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( if ((Size == 32 && static_cast(C) == INT32_MAX) || (Size == 64 && static_cast(C) == INT64_MAX)) return nullptr; - P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; + NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; C += 1; break; case CmpInst::ICMP_ULE: @@ -4121,7 +4126,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( if ((Size == 32 && static_cast(C) == UINT32_MAX) || (Size == 64 && C == UINT64_MAX)) return nullptr; - P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; + NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; C += 1; break; } @@ -4132,7 +4137,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( ImmFns = select12BitValueWithLeftShift(C); if (!ImmFns) return nullptr; - Predicate.setPredicate(P); + P = NewP; } // At this point, we know we can select an immediate form. Go ahead and do diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir index 59fcbd09c4c12..37d7ec60f553a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir @@ -627,4 +627,82 @@ body: | %3:gpr(s64) = G_AND %6, %5 $x0 = COPY %3(s64) RET_ReallyLR implicit $x0 + +... +--- +name: more_than_one_use_select +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; Both of these selects use the same compare. + ; + ; They should both be optimized in the same way, so the SUBS produced for + ; each CSEL should be the same. + + ; CHECK-LABEL: name: more_than_one_use_select + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %a:gpr64common = COPY $x0 + ; CHECK: %b:gpr64 = COPY $x1 + ; CHECK: %c:gpr64 = COPY $x2 + ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv + ; CHECK: %select1:gpr64 = CSELXr %a, %b, 11, implicit $nzcv + ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv + ; CHECK: %select2:gpr64 = CSELXr %b, %c, 11, implicit $nzcv + ; CHECK: %add:gpr64 = ADDXrr %select1, %select2 + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %a:gpr(s64) = COPY $x0 + %b:gpr(s64) = COPY $x1 + %c:gpr(s64) = COPY $x2 + %cst:gpr(s64) = G_CONSTANT i64 -1 + %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst + %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32) + %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b + %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c + %add:gpr(s64) = G_ADD %select1, %select2 + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 +... +--- +name: more_than_one_use_select_no_opt +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; When we don't end up doing the optimization, we should not change the + ; predicate. + ; + ; In this case, the CSELXrs should both have predicate code 13. + + ; CHECK-LABEL: name: more_than_one_use_select_no_opt + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %a:gpr64 = COPY $x0 + ; CHECK: %b:gpr64 = COPY $x1 + ; CHECK: %c:gpr64 = COPY $x2 + ; CHECK: %cst:gpr64 = MOVi64imm 922337203685477580 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv + ; CHECK: %select1:gpr64 = CSELXr %a, %b, 13, implicit $nzcv + ; CHECK: [[SUBSXrr1:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv + ; CHECK: %select2:gpr64 = CSELXr %b, %c, 13, implicit $nzcv + ; CHECK: %add:gpr64 = ADDXrr %select1, %select2 + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %a:gpr(s64) = COPY $x0 + %b:gpr(s64) = COPY $x1 + %c:gpr(s64) = COPY $x2 + %cst:gpr(s64) = G_CONSTANT i64 922337203685477580 + %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst + %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32) + %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b + %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c + %add:gpr(s64) = G_ADD %select1, %select2 + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 ... From f20ace6f333fa56af1879f7480a0e7979201c374 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 May 2020 16:12:08 -0700 Subject: [PATCH 170/770] [NFC, StackSafety] Better names for internal stuff Remove const from some parameters as upcoming changes in ScalarEvolution calls will need non const pointers. --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 36 ++++++++++------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 31f30d4b5d56f..4985647c29d41 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -201,15 +201,14 @@ class StackSafetyLocalAnalysis { const ConstantRange UnknownRange; - ConstantRange offsetFromAlloca(Value *Addr, const Value *AllocaPtr); - ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr, + ConstantRange offsetFrom(Value *Addr, Value *Base); + ConstantRange getAccessRange(Value *Addr, Value *Base, ConstantRange SizeRange); - ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr, - TypeSize Size); + ConstantRange getAccessRange(Value *Addr, Value *Base, TypeSize Size); ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U, - const Value *AllocaPtr); + Value *Base); - bool analyzeAllUses(const Value *Ptr, UseInfo &AS); + bool analyzeAllUses(Value *Ptr, UseInfo &AS); ConstantRange getRange(uint64_t Lower, uint64_t Upper) const { return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper)); @@ -225,13 +224,11 @@ class StackSafetyLocalAnalysis { FunctionInfo run(); }; -ConstantRange -StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr, - const Value *AllocaPtr) { +ConstantRange StackSafetyLocalAnalysis::offsetFrom(Value *Addr, Value *Base) { if (!SE.isSCEVable(Addr->getType())) return UnknownRange; - AllocaOffsetRewriter Rewriter(SE, AllocaPtr); + AllocaOffsetRewriter Rewriter(SE, Base); const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); assert(!Offset.isEmptySet()); @@ -239,7 +236,7 @@ StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr, } ConstantRange -StackSafetyLocalAnalysis::getAccessRange(Value *Addr, const Value *AllocaPtr, +StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, ConstantRange SizeRange) { // Zero-size loads and stores do not access memory. if (SizeRange.isEmptySet()) @@ -248,7 +245,7 @@ StackSafetyLocalAnalysis::getAccessRange(Value *Addr, const Value *AllocaPtr, if (!SE.isSCEVable(Addr->getType())) return UnknownRange; - AllocaOffsetRewriter Rewriter(SE, AllocaPtr); + AllocaOffsetRewriter Rewriter(SE, Base); const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); ConstantRange AccessStartRange = @@ -258,17 +255,16 @@ StackSafetyLocalAnalysis::getAccessRange(Value *Addr, const Value *AllocaPtr, return AccessRange; } -ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, - const Value *AllocaPtr, +ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, TypeSize Size) { ConstantRange SizeRange = Size.isScalable() ? ConstantRange::getFull(PointerSize) : getRange(0, Size.getFixedSize()); - return getAccessRange(Addr, AllocaPtr, SizeRange); + return getAccessRange(Addr, Base, SizeRange); } ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( - const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr) { + const MemIntrinsic *MI, const Use &U, Value *Base) { if (auto MTI = dyn_cast(MI)) { if (MTI->getRawSource() != U && MTI->getRawDest() != U) return getRange(0, 1); @@ -281,13 +277,13 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( if (!Len) return UnknownRange; ConstantRange AccessRange = - getAccessRange(U, AllocaPtr, getRange(0, Len->getZExtValue())); + getAccessRange(U, Base, getRange(0, Len->getZExtValue())); return AccessRange; } /// The function analyzes all local uses of Ptr (alloca or argument) and /// calculates local access range and all function calls where it was used. -bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { +bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, UseInfo &US) { SmallPtrSet Visited; SmallVector WorkList; WorkList.push_back(Ptr); @@ -354,7 +350,7 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { auto B = CB.arg_begin(), E = CB.arg_end(); for (auto A = B; A != E; ++A) { if (A->get() == V) { - ConstantRange Offset = offsetFromAlloca(UI, Ptr); + ConstantRange Offset = offsetFrom(UI, Ptr); US.Calls.emplace_back(Callee, A - B, Offset); } } @@ -387,7 +383,7 @@ FunctionInfo StackSafetyLocalAnalysis::run() { } } - for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) { + for (Argument &A : make_range(F.arg_begin(), F.arg_end())) { Info.Params.emplace_back(PointerSize); UseInfo &PS = Info.Params.back(); analyzeAllUses(&A, PS); From 5afef79ff465e1711a9412f6814d66ff80f50dcf Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 May 2020 16:48:08 -0700 Subject: [PATCH 171/770] [NFC, StackSafety] Remove duplicate code --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 4985647c29d41..223f99804bda4 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -242,14 +242,7 @@ StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, if (SizeRange.isEmptySet()) return ConstantRange::getEmpty(PointerSize); - if (!SE.isSCEVable(Addr->getType())) - return UnknownRange; - - AllocaOffsetRewriter Rewriter(SE, Base); - const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); - - ConstantRange AccessStartRange = - SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); + ConstantRange AccessStartRange = offsetFrom(Addr, Base); ConstantRange AccessRange = AccessStartRange.add(SizeRange); assert(!AccessRange.isEmptySet()); return AccessRange; From 4320d4aa1c1c7d8bd75537703f7a11140552b0fa Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 May 2020 16:53:02 -0700 Subject: [PATCH 172/770] [NFC, StackSafety] Add some missing includes --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 223f99804bda4..4b2fc300b1188 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -9,13 +9,19 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/ADT/APInt.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include #include using namespace llvm; From b5ae70046b0211ff75be8459f7282fe07ad918d8 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 May 2020 17:04:09 -0700 Subject: [PATCH 173/770] [StackSafety] Simplify SCEVRewriteVisitor Probably NFC. --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 13 ++----------- llvm/test/Analysis/StackSafetyAnalysis/local.ll | 1 + 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 4b2fc300b1188..10b9f14bc75a7 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -42,16 +42,6 @@ class AllocaOffsetRewriter : public SCEVRewriteVisitor { AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - const SCEV *visit(const SCEV *Expr) { - // Only re-write the expression if the alloca is used in an addition - // expression (it can be used in other types of expressions if it's cast to - // an int and passed as an argument.) - if (!isa(Expr) && !isa(Expr) && - !isa(Expr)) - return Expr; - return SCEVRewriteVisitor::visit(Expr); - } - const SCEV *visitUnknown(const SCEVUnknown *Expr) { // FIXME: look through one or several levels of definitions? // This can be inttoptr(AllocaPtr) and SCEV would not unwrap @@ -237,7 +227,8 @@ ConstantRange StackSafetyLocalAnalysis::offsetFrom(Value *Addr, Value *Base) { AllocaOffsetRewriter Rewriter(SE, Base); const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); - assert(!Offset.isEmptySet()); + if (Offset.isEmptySet()) + return UnknownRange; return Offset; } diff --git a/llvm/test/Analysis/StackSafetyAnalysis/local.ll b/llvm/test/Analysis/StackSafetyAnalysis/local.ll index 0ba1694e99eb4..b7c9eb1d29539 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/local.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/local.ll @@ -177,6 +177,7 @@ define void @NonConstantOffset(i1 zeroext %z) { ; CHECK-NEXT: args uses: ; CHECK-NEXT: z[]: full-set{{$}} ; CHECK-NEXT: allocas uses: +; FIXME: SCEV can't look through selects. ; CHECK-NEXT: x[4]: [0,4){{$}} ; CHECK-NOT: ]: entry: From ef3e83122665adcb2f7a7f380c9deb3dac68cb80 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 23 May 2020 18:10:34 -0400 Subject: [PATCH 174/770] GlobalISel: Basic legalization for G_PTRMASK --- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 3 + .../CodeGen/GlobalISel/LegalityPredicates.cpp | 6 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 16 ++ .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 19 +- .../AMDGPU/GlobalISel/legalize-ptrmask.mir | 221 ++++++++++++++++++ 5 files changed, 254 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 624fa70f1aa69..f913f5f41b8e8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -252,6 +252,9 @@ LegalityPredicate sizeNotPow2(unsigned TypeIdx); /// is not a power of 2. LegalityPredicate scalarOrEltSizeNotPow2(unsigned TypeIdx); +/// True if the total bitwidth of the specified type index is \p Size bits. +LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size); + /// True iff the specified type indices are both the same bit size. LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1); /// True iff the specified MMO index has a size that is not a power of 2 diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 601d50e9806fd..b6fb061a8334b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -126,6 +126,12 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { }; } +LegalityPredicate LegalityPredicates::sizeIs(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getSizeInBits() == Size; + }; +} + LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, unsigned TypeIdx1) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 704f1c4f96285..189c645ad9f4f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1210,6 +1210,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_PTRMASK: { + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, 2); + Observer.changedInstr(MI); + return Legalized; + } } } @@ -2143,6 +2151,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_PTRMASK: { + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c24996b93fa06..74e03e1d99199 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -66,12 +66,6 @@ static LegalityPredicate isMultiple32(unsigned TypeIdx, }; } -static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) { - return [=](const LegalityQuery &Query) { - return Query.Types[TypeIdx].getSizeInBits() == Size; - }; -} - static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; @@ -560,14 +554,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); } + // FIXME: Clamp offset operand. getActionDefinitionsBuilder(G_PTR_ADD) - .scalarize(0) - .alwaysLegal(); + .legalIf(isPointer(0)) + .scalarize(0); - // TODO: Clamp mask to pointer sizes getActionDefinitionsBuilder(G_PTRMASK) - .scalarize(0) - .alwaysLegal(); + .legalIf(typeInSet(1, {S64, S32})) + .minScalar(1, S32) + .maxScalarIf(sizeIs(0, 32), 1, S32) + .maxScalarIf(sizeIs(0, 64), 1, S64) + .scalarize(0); auto &CmpBuilder = getActionDefinitionsBuilder(G_ICMP) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir new file mode 100644 index 0000000000000..fe819107ce6e6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir @@ -0,0 +1,221 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: ptrmask_p1_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ptrmask_p1_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %1 + %3:_(p1) = G_PTRMASK %0, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: ptrmask_p1_s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ptrmask_p1_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(p1) = G_PTRMASK %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ptrmask_p1_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: ptrmask_p1_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(p1) = G_PTRMASK %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ptrmask_p1_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; CHECK-LABEL: name: ptrmask_p1_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[TRUNC]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(p1) = G_PTRMASK %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ptrmask_p0_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ptrmask_p0_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %1 + %3:_(p0) = G_PTRMASK %0, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: ptrmask_p0_s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ptrmask_p0_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(p0) = G_PTRMASK %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ptrmask_p0_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: ptrmask_p0_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(p0) = G_PTRMASK %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ptrmask_p0_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; CHECK-LABEL: name: ptrmask_p0_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[TRUNC]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 + %2:_(p0) = G_PTRMASK %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ptrmask_p3_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: ptrmask_p3_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[AND]](s32) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(p3) = G_PTRMASK %0, %2 + $vgpr0 = COPY %3 +... + +--- +name: ptrmask_p3_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: ptrmask_p3_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p3) = G_PTRMASK %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ptrmask_p3_s64 +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; CHECK-LABEL: name: ptrmask_p3_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr1_vgpr2 + %2:_(p3) = G_PTRMASK %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ptrmask_p3_s96 +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: ptrmask_p3_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr1_vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s32) + ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) + %0:_(p3) = COPY $vgpr0 + %1:_(s96) = COPY $vgpr1_vgpr2_vgpr3 + %2:_(p3) = G_PTRMASK %0, %1 + $vgpr0 = COPY %2 +... From 8e3307f5519fa58827c7b030274f122b1ed36617 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 24 May 2020 10:49:22 -0400 Subject: [PATCH 175/770] GlobalISel: Add a clarification to G_STORE documentation Mirror the note on G_LOAD. We probably do need to add an explicit G_TRUNCSTORE opcode for the vector case, although I do not have a use for it. --- llvm/docs/GlobalISel/GenericOpcode.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 6372192c0088f..2350b9cf37645 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -567,7 +567,11 @@ Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_STORE ^^^^^^^ -Generic store. Expects a MachineMemOperand in addition to explicit operands. +Generic store. Expects a MachineMemOperand in addition to explicit +operands. If the stored value size is greater than the memory size, +the high bits are implicitly truncated. If this is a vector store, the +high elements are discarded (i.e. this does not function as a per-lane +vector, truncating store) G_INDEXED_STORE ^^^^^^^^^^^^^^^ From 97a133f15724aa7ddf5d9b62dc9c0657a4efd115 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 26 May 2020 18:50:19 -0700 Subject: [PATCH 176/770] Temporarily Revert "[Clang][AArch64] Capturing proper pointer alignment for Neon vld1 intrinsicts" as it's causing crashes on code generation and https://bugs.llvm.org/show_bug.cgi?id=46084 This reverts commit 98cad555e29187a03e2bc3db5780762981913902. --- clang/lib/CodeGen/CGBuiltin.cpp | 12 ++--- clang/test/CodeGen/aarch64-neon-intrinsics.c | 52 ++++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bef0ad27145f3..b5129249c016d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10329,9 +10329,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { - auto Alignment = CGM.getNaturalPointeeTypeAlignment( - E->getArg(0)->IgnoreParenCasts()->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + auto Alignment = CharUnits::fromQuantity( + BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); } case NEON::BI__builtin_neon_vst1_v: @@ -10344,8 +10344,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - auto Alignment = CGM.getNaturalPointeeTypeAlignment( - E->getArg(0)->IgnoreParenCasts()->getType()); + auto Alignment = CharUnits::fromQuantity( + BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); @@ -10355,8 +10355,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *V = UndefValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - auto Alignment = CGM.getNaturalPointeeTypeAlignment( - E->getArg(0)->IgnoreParenCasts()->getType()); + auto Alignment = CharUnits::fromQuantity( + BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 1fb245f3d3429..7744b4f4a159d 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -8956,7 +8956,7 @@ float64_t test_vrsqrted_f64(float64_t a) { // CHECK-LABEL: @test_vld1q_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] // CHECK: ret <16 x i8> [[TMP1]] uint8x16_t test_vld1q_u8(uint8_t const *a) { return vld1q_u8(a); @@ -8965,7 +8965,7 @@ uint8x16_t test_vld1q_u8(uint8_t const *a) { // CHECK-LABEL: @test_vld1q_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] // CHECK: ret <8 x i16> [[TMP2]] uint16x8_t test_vld1q_u16(uint16_t const *a) { return vld1q_u16(a); @@ -8974,7 +8974,7 @@ uint16x8_t test_vld1q_u16(uint16_t const *a) { // CHECK-LABEL: @test_vld1q_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] // CHECK: ret <4 x i32> [[TMP2]] uint32x4_t test_vld1q_u32(uint32_t const *a) { return vld1q_u32(a); @@ -8983,7 +8983,7 @@ uint32x4_t test_vld1q_u32(uint32_t const *a) { // CHECK-LABEL: @test_vld1q_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* -// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 +// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] // CHECK: ret <2 x i64> [[TMP2]] uint64x2_t test_vld1q_u64(uint64_t const *a) { return vld1q_u64(a); @@ -8991,7 +8991,7 @@ uint64x2_t test_vld1q_u64(uint64_t const *a) { // CHECK-LABEL: @test_vld1q_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] // CHECK: ret <16 x i8> [[TMP1]] int8x16_t test_vld1q_s8(int8_t const *a) { return vld1q_s8(a); @@ -9000,7 +9000,7 @@ int8x16_t test_vld1q_s8(int8_t const *a) { // CHECK-LABEL: @test_vld1q_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] // CHECK: ret <8 x i16> [[TMP2]] int16x8_t test_vld1q_s16(int16_t const *a) { return vld1q_s16(a); @@ -9009,7 +9009,7 @@ int16x8_t test_vld1q_s16(int16_t const *a) { // CHECK-LABEL: @test_vld1q_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] // CHECK: ret <4 x i32> [[TMP2]] int32x4_t test_vld1q_s32(int32_t const *a) { return vld1q_s32(a); @@ -9018,7 +9018,7 @@ int32x4_t test_vld1q_s32(int32_t const *a) { // CHECK-LABEL: @test_vld1q_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* -// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 +// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] // CHECK: ret <2 x i64> [[TMP2]] int64x2_t test_vld1q_s64(int64_t const *a) { return vld1q_s64(a); @@ -9027,7 +9027,7 @@ int64x2_t test_vld1q_s64(int64_t const *a) { // CHECK-LABEL: @test_vld1q_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* -// CHECK: [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]] // CHECK: ret <8 x half> [[TMP2]] float16x8_t test_vld1q_f16(float16_t const *a) { return vld1q_f16(a); @@ -9036,7 +9036,7 @@ float16x8_t test_vld1q_f16(float16_t const *a) { // CHECK-LABEL: @test_vld1q_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]] // CHECK: ret <4 x float> [[TMP2]] float32x4_t test_vld1q_f32(float32_t const *a) { return vld1q_f32(a); @@ -9045,7 +9045,7 @@ float32x4_t test_vld1q_f32(float32_t const *a) { // CHECK-LABEL: @test_vld1q_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* -// CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +// CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]] // CHECK: ret <2 x double> [[TMP2]] float64x2_t test_vld1q_f64(float64_t const *a) { return vld1q_f64(a); @@ -9053,7 +9053,7 @@ float64x2_t test_vld1q_f64(float64_t const *a) { // CHECK-LABEL: @test_vld1q_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] // CHECK: ret <16 x i8> [[TMP1]] poly8x16_t test_vld1q_p8(poly8_t const *a) { return vld1q_p8(a); @@ -9062,7 +9062,7 @@ poly8x16_t test_vld1q_p8(poly8_t const *a) { // CHECK-LABEL: @test_vld1q_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] // CHECK: ret <8 x i16> [[TMP2]] poly16x8_t test_vld1q_p16(poly16_t const *a) { return vld1q_p16(a); @@ -9070,7 +9070,7 @@ poly16x8_t test_vld1q_p16(poly16_t const *a) { // CHECK-LABEL: @test_vld1_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] // CHECK: ret <8 x i8> [[TMP1]] uint8x8_t test_vld1_u8(uint8_t const *a) { return vld1_u8(a); @@ -9079,7 +9079,7 @@ uint8x8_t test_vld1_u8(uint8_t const *a) { // CHECK-LABEL: @test_vld1_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] // CHECK: ret <4 x i16> [[TMP2]] uint16x4_t test_vld1_u16(uint16_t const *a) { return vld1_u16(a); @@ -9088,7 +9088,7 @@ uint16x4_t test_vld1_u16(uint16_t const *a) { // CHECK-LABEL: @test_vld1_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] // CHECK: ret <2 x i32> [[TMP2]] uint32x2_t test_vld1_u32(uint32_t const *a) { return vld1_u32(a); @@ -9097,7 +9097,7 @@ uint32x2_t test_vld1_u32(uint32_t const *a) { // CHECK-LABEL: @test_vld1_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* -// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8 +// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] // CHECK: ret <1 x i64> [[TMP2]] uint64x1_t test_vld1_u64(uint64_t const *a) { return vld1_u64(a); @@ -9105,7 +9105,7 @@ uint64x1_t test_vld1_u64(uint64_t const *a) { // CHECK-LABEL: @test_vld1_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] // CHECK: ret <8 x i8> [[TMP1]] int8x8_t test_vld1_s8(int8_t const *a) { return vld1_s8(a); @@ -9114,7 +9114,7 @@ int8x8_t test_vld1_s8(int8_t const *a) { // CHECK-LABEL: @test_vld1_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] // CHECK: ret <4 x i16> [[TMP2]] int16x4_t test_vld1_s16(int16_t const *a) { return vld1_s16(a); @@ -9123,7 +9123,7 @@ int16x4_t test_vld1_s16(int16_t const *a) { // CHECK-LABEL: @test_vld1_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] // CHECK: ret <2 x i32> [[TMP2]] int32x2_t test_vld1_s32(int32_t const *a) { return vld1_s32(a); @@ -9132,7 +9132,7 @@ int32x2_t test_vld1_s32(int32_t const *a) { // CHECK-LABEL: @test_vld1_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* -// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8 +// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] // CHECK: ret <1 x i64> [[TMP2]] int64x1_t test_vld1_s64(int64_t const *a) { return vld1_s64(a); @@ -9141,7 +9141,7 @@ int64x1_t test_vld1_s64(int64_t const *a) { // CHECK-LABEL: @test_vld1_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* -// CHECK: [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]] // CHECK: ret <4 x half> [[TMP2]] float16x4_t test_vld1_f16(float16_t const *a) { return vld1_f16(a); @@ -9150,7 +9150,7 @@ float16x4_t test_vld1_f16(float16_t const *a) { // CHECK-LABEL: @test_vld1_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]] // CHECK: ret <2 x float> [[TMP2]] float32x2_t test_vld1_f32(float32_t const *a) { return vld1_f32(a); @@ -9159,7 +9159,7 @@ float32x2_t test_vld1_f32(float32_t const *a) { // CHECK-LABEL: @test_vld1_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* -// CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]], align 8 +// CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]] // CHECK: ret <1 x double> [[TMP2]] float64x1_t test_vld1_f64(float64_t const *a) { return vld1_f64(a); @@ -9167,7 +9167,7 @@ float64x1_t test_vld1_f64(float64_t const *a) { // CHECK-LABEL: @test_vld1_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] // CHECK: ret <8 x i8> [[TMP1]] poly8x8_t test_vld1_p8(poly8_t const *a) { return vld1_p8(a); @@ -9176,7 +9176,7 @@ poly8x8_t test_vld1_p8(poly8_t const *a) { // CHECK-LABEL: @test_vld1_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 +// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] // CHECK: ret <4 x i16> [[TMP2]] poly16x4_t test_vld1_p16(poly16_t const *a) { return vld1_p16(a); From 23a2f4521467a708fb1f9ae1f9536f302a1dc7e3 Mon Sep 17 00:00:00 2001 From: Kang Zhang Date: Wed, 27 May 2020 02:35:45 +0000 Subject: [PATCH 177/770] [NFC][PowerPC] Modify the test case two-address-crash.mir --- .../CodeGen/PowerPC/two-address-crash.mir | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/two-address-crash.mir b/llvm/test/CodeGen/PowerPC/two-address-crash.mir index 6e98d3d8d398b..caf036358af9f 100644 --- a/llvm/test/CodeGen/PowerPC/two-address-crash.mir +++ b/llvm/test/CodeGen/PowerPC/two-address-crash.mir @@ -1,5 +1,7 @@ # RUN: not --crash llc -mtriple=ppc32-- %s -run-pass=phi-node-elimination \ # RUN: -verify-machineinstrs -o /dev/null 2>&1 | FileCheck %s +# RUN: llc -mtriple=ppc32-- %s -start-before=phi-node-elimination \ +# RUN: -verify-machineinstrs -o /dev/null 2>&1 --- | define void @VerifyTwoAddressCrash(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) { @@ -16,6 +18,56 @@ ... --- name: VerifyTwoAddressCrash +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gprc, preferred-register: '' } + - { id: 1, class: gprc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc_and_gprc_nor0, preferred-register: '' } + - { id: 4, class: gprc, preferred-register: '' } + - { id: 5, class: crrc, preferred-register: '' } + - { id: 6, class: crbitrc, preferred-register: '' } + - { id: 7, class: gprc_and_gprc_nor0, preferred-register: '' } + - { id: 8, class: gprc_and_gprc_nor0, preferred-register: '' } + - { id: 9, class: gprc, preferred-register: '' } + - { id: 10, class: gprc, preferred-register: '' } + - { id: 11, class: gprc, preferred-register: '' } +liveins: + - { reg: '$r3', virtual-reg: '%0' } + - { reg: '$r4', virtual-reg: '%1' } + - { reg: '$r5', virtual-reg: '%2' } + - { reg: '$r6', virtual-reg: '%3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} body: | bb.0 (%ir-block.0): liveins: $r3, $r4, $r5, $r6 @@ -40,6 +92,6 @@ body: | # CHECK-LABEL: Bad machine code: Two-address instruction operands must be identical # CHECK-NEXT: - function: VerifyTwoAddressCrash # CHECK-NEXT: - basic block: %bb.0 -# CHECK-NEXT: - instruction: %10:gprc = RLWIMI killed %9:gprc(tied-def 0), killed %3:gprc, 1, 0, 30 +# CHECK-NEXT: - instruction: %10:gprc = RLWIMI killed %9:gprc(tied-def 0), killed %0:gprc, 1, 0, 30 # CHECK-NEXT: - operand 1: killed %9:gprc(tied-def 0) # CHECK-NEXT: LLVM ERROR: Found 1 machine code errors. From a7141480fb04eadf8d7d60c03494bcc885979a8e Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Wed, 27 May 2020 02:37:04 +0000 Subject: [PATCH 178/770] [compiler-rt][NFC]Fix Wdeprecated warnings for fsanitize-coverage A few testcases are still using deprecated options. warning: argument '-fsanitize-coverage=[func|bb|edge]' is deprecated, use '-fsanitize-coverage=[func|bb|edge],[trace-pc-guard|trace-pc]' instead [-Wdeprecated] Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D79741 --- .../test/asan/TestCases/Windows/coverage-basic.cpp | 2 +- .../test/asan/TestCases/coverage-disabled.cpp | 2 +- compiler-rt/test/msan/coverage-levels.cpp | 8 ++++---- .../test/ubsan/TestCases/Misc/coverage-levels.cpp | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Windows/coverage-basic.cpp b/compiler-rt/test/asan/TestCases/Windows/coverage-basic.cpp index 1469e1c30ae34..163247e09bf7d 100644 --- a/compiler-rt/test/asan/TestCases/Windows/coverage-basic.cpp +++ b/compiler-rt/test/asan/TestCases/Windows/coverage-basic.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t-dir // RUN: mkdir %t-dir && cd %t-dir -// RUN: %clangxx_asan -fsanitize-coverage=func %s -o test.exe +// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s -o test.exe // RUN: %env_asan_opts=coverage=1 %run ./test.exe // // RUN: %sancov print *.sancov | FileCheck %s diff --git a/compiler-rt/test/asan/TestCases/coverage-disabled.cpp b/compiler-rt/test/asan/TestCases/coverage-disabled.cpp index 46a822dff08c1..2a283b4652121 100644 --- a/compiler-rt/test/asan/TestCases/coverage-disabled.cpp +++ b/compiler-rt/test/asan/TestCases/coverage-disabled.cpp @@ -3,7 +3,7 @@ // RUN: rm -rf %t-dir // RUN: mkdir -p %t-dir // -// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t +// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s -o %t // // RUN: %env_asan_opts=coverage_direct=0:coverage_dir='"%t-dir"':verbosity=1 %run %t // RUN: not %sancov print %t-dir/*.sancov 2>&1 diff --git a/compiler-rt/test/msan/coverage-levels.cpp b/compiler-rt/test/msan/coverage-levels.cpp index 5ca3b717d04fb..1b7778e9d7aa8 100644 --- a/compiler-rt/test/msan/coverage-levels.cpp +++ b/compiler-rt/test/msan/coverage-levels.cpp @@ -1,13 +1,13 @@ // Test various levels of coverage // -// RUN: %clangxx_msan -DINIT_VAR=1 -O1 -fsanitize-coverage=func %s -o %t +// RUN: %clangxx_msan -DINIT_VAR=1 -O1 -fsanitize-coverage=func,trace-pc-guard %s -o %t // RUN: mkdir -p %t-dir // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%t-dir %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN -// RUN: %clangxx_msan -O1 -fsanitize-coverage=func %s -o %t +// RUN: %clangxx_msan -O1 -fsanitize-coverage=func,trace-pc-guard %s -o %t // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%t-dir not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_WARN -// RUN: %clangxx_msan -O1 -fsanitize-coverage=bb %s -o %t +// RUN: %clangxx_msan -O1 -fsanitize-coverage=bb,trace-pc-guard %s -o %t // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%t-dir not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2 --check-prefix=CHECK_WARN -// RUN: %clangxx_msan -O1 -fsanitize-coverage=edge %s -o %t +// RUN: %clangxx_msan -O1 -fsanitize-coverage=edge,trace-pc-guard %s -o %t // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%t-dir not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3 --check-prefix=CHECK_WARN volatile int sink; diff --git a/compiler-rt/test/ubsan/TestCases/Misc/coverage-levels.cpp b/compiler-rt/test/ubsan/TestCases/Misc/coverage-levels.cpp index 364f985c50514..4a94350ec1620 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/coverage-levels.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/coverage-levels.cpp @@ -4,20 +4,20 @@ // REQUIRES: shell // // RUN: rm -rf %t-dir && mkdir %t-dir -// RUN: %clangxx -fsanitize=shift -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func %s -o %t +// RUN: %clangxx -fsanitize=shift -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func,trace-pc-guard %s -o %t // RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%t-dir"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN -// RUN: %clangxx -fsanitize=undefined -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func %s -o %t +// RUN: %clangxx -fsanitize=undefined -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func,trace-pc-guard %s -o %t // RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%t-dir"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN // Also works without any sanitizer. -// RUN: %clangxx -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func %s -o %t +// RUN: %clangxx -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func,trace-pc-guard %s -o %t // RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%t-dir"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN -// RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=func %s -o %t +// RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=func,trace-pc-guard %s -o %t // RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%t-dir"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_WARN -// RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=bb %s -o %t +// RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=bb,trace-pc-guard %s -o %t // RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%t-dir"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2 --check-prefix=CHECK_WARN -// RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=edge %s -o %t +// RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=edge,trace-pc-guard %s -o %t // RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%t-dir"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3 --check-prefix=CHECK_WARN // Coverage is not yet implemented in TSan. From 5759e4731635e1f28fef2c4619491a1b4a2bc305 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Tue, 26 May 2020 16:35:20 -0700 Subject: [PATCH 179/770] [mlir][Linalg] Avoid using scf.parallel for non-parallel loops in Linalg ops. Modifying the loop nest builder for generating scf.parallel loops to not generate scf.parallel loops for non-parallel iterator types in Linalg operations. The existing implementation incorrectly generated scf.parallel for all tiled loops. It is rectified by refactoring logic used while lowering to loops that accounted for this. Differential Revision: https://reviews.llvm.org/D80188 --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 31 +++++ mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 81 ++----------- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 10 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 89 +++++++++++++++ mlir/test/Dialect/Linalg/parallel_loops.mlir | 38 +++++- .../Dialect/Linalg/tile_parallel_reduce.mlir | 108 ++++++++++++++++++ .../Dialect/Linalg/transform-patterns.mlir | 25 +++- .../lib/Transforms/TestLinalgTransforms.cpp | 8 ++ 8 files changed, 312 insertions(+), 78 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/tile_parallel_reduce.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 9caec484659e8..c8a5d83438f56 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -9,14 +9,21 @@ #ifndef MLIR_DIALECT_LINALG_UTILS_H_ #define MLIR_DIALECT_LINALG_UTILS_H_ +#include "mlir/Dialect/Affine/EDSC/Intrinsics.h" +#include "mlir/Dialect/Linalg/EDSC/Builders.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "llvm/ADT/SetVector.h" +using mlir::edsc::intrinsics::AffineIndexedValue; +using mlir::edsc::intrinsics::StdIndexedValue; + namespace mlir { class AffineExpr; +class AffineForOp; class AffineMap; class OperationFolder; class PatternRewriter; @@ -49,6 +56,15 @@ struct RegionMatcher { static Optional matchAsScalarBinaryOp(GenericOp op); }; +/// Checks if an iterator_type attribute is parallel. +bool isParallelIteratorType(Attribute attr); + +/// Checks if an iterator_type attribute is parallel. +bool isReductionIteratorType(Attribute attr); + +/// Checks if an iterator_type attribute is parallel. +bool isWindowIteratorType(Attribute attr); + /// Checks whether the specific `producer` is the last write to exactly the /// whole `consumedView`. This checks structural dominance, that the dependence /// is a RAW without any interleaved write to any piece of `consumedView`. @@ -141,6 +157,21 @@ void applyPermutationToVector(SmallVector &inVec, inVec = auxVec; } +/// Utility class used to generate nested loops with ranges described by +/// `loopRanges` and loop type described by the `iteratorTypes`. `allIvs` is +/// populated with induction variables for all generated loops on return, with +/// `fun` used to generate the body of the innermost loop. +template +struct GenerateLoopNest { + using IndexedValueTy = + typename std::conditional::value, + AffineIndexedValue, StdIndexedValue>::type; + static void doit(MutableArrayRef allIvs, + ArrayRef loopRanges, + ArrayRef iteratorTypes, + std::function fun); +}; + } // namespace linalg } // namespace mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 74da63dafee37..910078875f57b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -487,80 +487,9 @@ class LinalgScopedEmitter { } }; -namespace { -/// Helper struct to generate the loop nest for the op. This factored out here -/// to be able to partially specialize this for different LoopTy. -template -class GenerateLoopNest { -public: - using IndexedValueTy = - typename std::conditional::value, - AffineIndexedValue, StdIndexedValue>::type; - static void doit(ConcreteOpTy linalgOp, ArrayRef loopRanges, - MutableArrayRef allIvs) { - GenericLoopNestRangeBuilder(allIvs, loopRanges)([&] { - SmallVector allIvValues(allIvs.begin(), allIvs.end()); - LinalgScopedEmitter::emitScalarImplementation(allIvValues, - linalgOp); - }); - } -}; - -/// Generates loop nest using scf.parallel. scf.parallel is only used for the -/// outer parallel loops. All other loops are generated using scf.for -/// operation. -template -class GenerateLoopNest { -public: - using IndexedValueTy = StdIndexedValue; - - static void doit(ConcreteOpTy linalgOp, ArrayRef loopRanges, - MutableArrayRef allIvs) { - // Only generate scf.parallel for outer consecutive "parallel" - // iterator_types. - // TODO(ravishankarm): Generate scf.parallel for all "parallel" iterator - // types, not just the outer most ones. Also handle "reduction" iterator - // types. - auto nOuterPar = linalgOp.iterator_types() - .getValue() - .take_while([](Attribute attr) { - return attr.cast().getValue() == - getParallelIteratorTypeName(); - }) - .size(); - // If there are no outer parallel loops, then number of loop ops is same as - // the number of loops, and they are all scf.for ops. - if (nOuterPar) { - GenericLoopNestRangeBuilder( - allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar))([&] { - GenericLoopNestRangeBuilder( - allIvs.drop_front(nOuterPar), - loopRanges.drop_front(nOuterPar))([&] { - SmallVector allIvValues(allIvs.begin(), allIvs.end()); - LinalgScopedEmitter:: - emitScalarImplementation(allIvValues, linalgOp); - }); - }); - } else { - // If there are no parallel loops then fallback to generating all scf.for - // operations. - GenericLoopNestRangeBuilder(allIvs, loopRanges)([&] { - SmallVector allIvValues(allIvs.begin(), allIvs.end()); - LinalgScopedEmitter::emitScalarImplementation(allIvValues, - linalgOp); - }); - } - } -}; -} // namespace - template Optional linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) { - using Impl = GenerateLoopNest; - using IndexedValueTy = - typename GenerateLoopNest::IndexedValueTy; + using IndexedValueTy = typename GenerateLoopNest::IndexedValueTy; ScopedContext scope(builder, op->getLoc()); @@ -591,7 +520,13 @@ Optional linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) { emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), invertedMap, getViewSizes(builder, linalgOp)); assert(loopRanges.size() == allIvs.size()); - Impl::doit(linalgOp, loopRanges, allIvs); + GenerateLoopNest::doit( + allIvs, loopRanges, linalgOp.iterator_types().getValue(), [&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter::emitScalarImplementation(allIvValues, + linalgOp); + }); // Number of loop ops might be different from the number of ivs since some // loops like affine.parallel and scf.parallel have multiple ivs. llvm::SetVector loopSet; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 2d875d4e95e4d..5b4fec4bbf20e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -376,7 +376,11 @@ Optional static tileLinalgOpImpl( // 3. Create the tiled loops. LinalgOp res = op; SmallVector ivs(loopRanges.size()); - GenericLoopNestRangeBuilder(ivs, loopRanges)([&] { + SmallVector iteratorTypes = + llvm::to_vector<4>(op.iterator_types().cast().getValue()); + if (!options.interchangeVector.empty()) + applyPermutationToVector(iteratorTypes, options.interchangeVector); + GenerateLoopNest::doit(ivs, loopRanges, iteratorTypes, [&] { auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); SmallVector ivValues(ivs.begin(), ivs.end()); @@ -384,8 +388,8 @@ Optional static tileLinalgOpImpl( // If we have to apply a permutation to the tiled loop nest, we have to // reorder the induction variables This permutation is the right one // assuming that loopRanges have previously been permuted by - // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation of - // that one: (d0,d1,d2)->(d2,d0,d1) + // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation + // of that one: (d0,d1,d2)->(d2,d0,d1) if (!options.interchangeVector.empty()) ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 4f86b934172b0..cd8b17650bb11 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" +#include "mlir/Dialect/SCF/EDSC/Builders.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/AffineExpr.h" @@ -101,3 +102,91 @@ mlir::linalg::getAssumedNonViewOperands(LinalgOp linalgOp) { } return res; } + +bool mlir::linalg::isParallelIteratorType(Attribute attr) { + if (auto strAttr = attr.dyn_cast()) { + return strAttr.getValue() == getParallelIteratorTypeName(); + } + return false; +} + +bool mlir::linalg::isReductionIteratorType(Attribute attr) { + if (auto strAttr = attr.dyn_cast()) { + return strAttr.getValue() == getReductionIteratorTypeName(); + } + return false; +} + +bool mlir::linalg::isWindowIteratorType(Attribute attr) { + if (auto strAttr = attr.dyn_cast()) { + return strAttr.getValue() == getWindowIteratorTypeName(); + } + return false; +} + +/// Explicit instantiation of loop nest generator for different loop types. +template struct mlir::linalg::GenerateLoopNest; +template struct mlir::linalg::GenerateLoopNest; +template struct mlir::linalg::GenerateLoopNest; + +/// Specialization of loop nest generator for scf.parallel loops to handle +/// iterator types that are not parallel. These are generated as sequential +/// loops. +template <> +void mlir::linalg::GenerateLoopNest::doit( + MutableArrayRef allIvs, ArrayRef loopRanges, + ArrayRef iteratorTypes, std::function fun) { + edsc::GenericLoopNestRangeBuilder(allIvs, loopRanges)(fun); +} + +template <> +void mlir::linalg::GenerateLoopNest::doit( + MutableArrayRef allIvs, ArrayRef loopRanges, + ArrayRef iteratorTypes, std::function fun) { + edsc::GenericLoopNestRangeBuilder(allIvs, loopRanges)(fun); +} + +template <> +void mlir::linalg::GenerateLoopNest::doit( + MutableArrayRef allIvs, ArrayRef loopRanges, + ArrayRef iteratorTypes, std::function fun) { + // Check if there is nothing to do here. This is also the recursion + // termination. + if (loopRanges.empty()) + return; + size_t nOuterPar = iteratorTypes.take_front(loopRanges.size()) + .take_while(isParallelIteratorType) + .size(); + if (nOuterPar == 0 && loopRanges.size() == 1) + // Generate the sequential for loop for the remaining non-parallel loop. + return GenerateLoopNest::doit(allIvs, loopRanges, iteratorTypes, + fun); + if (nOuterPar == 0) { + // The immediate outer loop is not parallel. Generate a scf.for op for this + // loop, but there might be subsequent loops that are parallel. Use + // recursion to find those. + auto nestedFn = [&]() { + GenerateLoopNest::doit(allIvs.drop_front(), + loopRanges.drop_front(), + iteratorTypes.drop_front(), fun); + }; + return GenerateLoopNest::doit(allIvs[0], loopRanges[0], + iteratorTypes[0], nestedFn); + } + if (nOuterPar == loopRanges.size()) { + // All loops are parallel, so generate the scf.parallel op. + return edsc::GenericLoopNestRangeBuilder(allIvs, + loopRanges)(fun); + } + // Generate scf.parallel for the outer parallel loops. The next inner loop is + // sequential, but there might be more parallel loops after that. So recurse + // into the same method. + auto nestedFn = [&]() { + GenerateLoopNest::doit( + allIvs.drop_front(nOuterPar), loopRanges.drop_front(nOuterPar), + iteratorTypes.drop_front(nOuterPar), fun); + }; + return GenerateLoopNest::doit( + allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar), + iteratorTypes.take_front(nOuterPar), nestedFn); +} diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir index abe9cccc8b75b..2174ddc3c269d 100644 --- a/mlir/test/Dialect/Linalg/parallel_loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir @@ -57,6 +57,42 @@ func @lower_outer_parallel(%A: memref, %B: memref) { // CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3 // CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]]) // CHECK: scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]] -// CHECK: scf.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]] +// CHECK: scf.parallel (%[[IV3:.*]]) = (%[[C0]]) to (%[[D3]]) step (%[[C1]]) // CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]] + +// ----- + +#accesses = [ + affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>, + affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d4, d5)> +] +#trait = { + args_in = 1, + args_out = 1, + iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"], + indexing_maps = #accesses +} + +func @lower_mixed_parallel(%A: memref, %B: memref) { + linalg.generic #trait %A, %B { + ^bb0(%a: f32, %b: f32): + linalg.yield %a: f32 + } : memref, memref + return +} +// CHECK-LABEL: @lower_mixed_parallel +// CHECK-DAG: %[[C0:.*]] = constant 0 +// CHECK-DAG: %[[C1:.*]] = constant 1 +// CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, 0 +// CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, 1 +// CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, 2 +// CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3 +// CHECK-DAG: %[[D4:.*]] = dim %{{.*}}, 4 +// CHECK-DAG: %[[D5:.*]] = dim %{{.*}}, 5 +// CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]]) +// CHECK: scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]] +// CHECK: scf.parallel (%[[IV3:.*]], %[[IV4:.*]]) = (%[[C0]], %[[C0]]) to (%[[D3]], %[[D4]]) step (%[[C1]], %[[C1]]) +// CHECK: scf.for %[[IV5:.*]] = %[[C0]] to %[[D5]] step %[[C1]] +// CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]] +// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV2]], %[[IV4]], %[[IV5]]] diff --git a/mlir/test/Dialect/Linalg/tile_parallel_reduce.mlir b/mlir/test/Dialect/Linalg/tile_parallel_reduce.mlir new file mode 100644 index 0000000000000..bfa14570aef13 --- /dev/null +++ b/mlir/test/Dialect/Linalg/tile_parallel_reduce.mlir @@ -0,0 +1,108 @@ +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,4,8" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" -split-input-file | FileCheck %s -check-prefix=TILE1 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,4" -split-input-file | FileCheck %s -check-prefix=TILE2 + +func @gemm(%arg0 : memref, + %arg1 : memref, + %arg2 : memref) +{ + linalg.matmul(%arg0, %arg1, %arg2) + : memref, memref, memref + return +} +// CHECK-LABEL: func @gemm +// CHECK-DAG: %[[C2:.*]] = constant 2 : index +// CHECK-DAG: %[[C4:.*]] = constant 4 : index +// CHECK-DAG: %[[C8:.*]] = constant 8 : index +// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = +// CHECK-SAME: step (%[[C2]], %[[C4]]) +// CHECK: scf.for %[[ARG5:.*]] = +// CHECK-SAME: step %[[C8]] +// CHECK: %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG5]]] +// CHECK: %[[SV2:.*]] = subview %{{.*}}[%[[ARG5]], %[[ARG4]]] +// CHECK: %[[SV3:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]]] +// CHECK: linalg.matmul(%[[SV1]], %[[SV2]], %[[SV3]]) + +// TILE1-LABEL: func @gemm +// TILE1-DAG: %[[C2:.*]] = constant 2 : index +// TILE1: scf.parallel (%[[ARG3:.*]]) = +// TILE1-SAME: step (%[[C2]]) +// TILE1: %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], 0] +// TILE1: %[[SV3:.*]] = subview %{{.*}}[%[[ARG3]], 0] +// TILE1-NOT: subview +// TILE1: linalg.matmul(%[[SV1]], %{{.*}}, %[[SV3]]) + +// TILE2-LABEL: func @gemm +// TILE2-DAG: %[[C2:.*]] = constant 2 : index +// TILE2-DAG: %[[C4:.*]] = constant 4 : index +// TILE2: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = +// TILE2-SAME: step (%[[C2]], %[[C4]]) +// TILE2: %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], 0] +// TILE2: %[[SV2:.*]] = subview %{{.*}}[0, %[[ARG4]]] +// TILE2: %[[SV3:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]]] +// TILE2: linalg.matmul(%[[SV1]], %[[SV2]], %[[SV3]]) + +// ----- + +#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1)> +#accesses = [#map0, #map1, #map2] +#trait = { + args_in = 2 : i64, + args_out = 1 : i64, + iterator_types = ["reduction", "parallel", "reduction"], + indexing_maps = #accesses +} + +func @reduction(%arg0 : memref, + %arg1 : memref, + %arg2 : memref) +{ + linalg.generic #trait %arg0, %arg1, %arg2 { + ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): + %0 = addf %arg3, %arg4 : f32 + %1 = addf %0, %arg5 : f32 + linalg.yield %1 : f32 + } : memref, memref, memref + return +} + +// CHECK-LABEL: func @reduction +// CHECK-DAG: %[[C2:.*]] = constant 2 : index +// CHECK-DAG: %[[C4:.*]] = constant 4 : index +// CHECK-DAG: %[[C8:.*]] = constant 8 : index +// CHECK: scf.for %[[ARG3:.*]] = +// CHECK-SAME: step %[[C2]] +// CHECK: scf.parallel (%[[ARG4:.*]]) = +// CHECK-SAME: step (%[[C4]]) +// CHECK: scf.for %[[ARG5:.*]] = +// CHECK-SAME: step %[[C8]] +// CHECK: %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]] +// CHECK: %[[SV2:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG5]]] +// CHECK: %[[SV3:.*]] = subview %{{.*}}[%[[ARG4]]] +// CHECK: linalg.generic +// CHECK-SAME: %[[SV1]], %[[SV2]], %[[SV3]] + +// TILE1-LABEL: func @reduction +// TILE1-DAG: %[[C2:.*]] = constant 2 : index +// TILE1: scf.for %[[ARG3:.*]] = +// TILE1-SAME: step %[[C2]] +// TILE1: %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], 0, 0] +// TILE1: %[[SV2:.*]] = subview %{{.*}}[%[[ARG3]], 0] +// TILE1-NOT: subview +// TILE1: linalg.generic +// TILE1-SAME: %[[SV1]], %[[SV2]], %{{.*}} + +// TILE2-LABEL: func @reduction +// TILE2-DAG: %[[C2:.*]] = constant 2 : index +// TILE2-DAG: %[[C4:.*]] = constant 4 : index +// TILE2: scf.for %[[ARG3:.*]] = +// TILE2-SAME: step %[[C2]] +// TILE2: scf.parallel (%[[ARG4:.*]]) = +// TILE2-SAME: step (%[[C4]]) +// TILE2: %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0] +// TILE2: %[[SV2:.*]] = subview %{{.*}}[%[[ARG3]], 0] +// TILE2: %[[SV3:.*]] = subview %{{.*}}[%[[ARG4]]] +// TILE2: linalg.generic +// TILE2-SAME: %[[SV1]], %[[SV2]], %[[SV3]] diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir index ce868d156f6d8..4c46c74fe4909 100644 --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -44,7 +44,8 @@ func @matvec(%A: memref, // CHECK-DAG: %[[c0:.*]] = constant 0 : index // CHECK-DAG: %[[c5:.*]] = constant 5 : index // CHECK-DAG: %[[c6:.*]] = constant 6 : index -// CHECK: scf.parallel {{.*}} step (%[[c5]], %[[c6]]) +// CHECK: scf.parallel {{.*}} step (%[[c5]]) +// CHECK: scf.for {{.*}} step %[[c6]] // CHECK: linalg.matvec({{.*}}, {{.*}}, {{.*}}) : memref, memref, memref func @matmul(%A: memref, @@ -364,3 +365,25 @@ func @aligned_promote_fill(%arg0: memref) { // CHECK: linalg.fill(%[[v0]], {{%.*}}) : memref, f32 // CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, memref // CHECK: linalg.fill(%[[v0]], %[[cf]]) : memref, f32 + +func @tile_permute_parallel_loop(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.matmul(%arg0, %arg1, %arg2) {__internal_linalg_transform__ = "par__with_perm__"} + : memref, memref, memref + return +} +// CHECK-LABEL: func @tile_permute_parallel_loop +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref +// CHECK-DAG: %[[C16:.*]] = constant 16 : index +// CHECK-DAG: %[[C8:.*]] = constant 8 : index +// CHECK-DAG: %[[C4:.*]] = constant 4 : index +// CHECK-DAG: %[[C0:.*]] = constant 0 : index +// CHECK-DAG: %[[D0:.*]] = dim %[[ARG0]], 0 +// CHECK-DAG: %[[D1:.*]] = dim %[[ARG0]], 1 +// CHECK-DAG: %[[D2:.*]] = dim %[[ARG1]], 1 +// CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D2]]) step (%[[C8]]) +// CHECK: scf.for %{{.*}} = %[[C0]] to %[[D1]] step %[[C4]] +// CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D0]]) step (%[[C16]]) diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index e38153058419c..7547e2953ef21 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -101,6 +101,14 @@ static void applyPatterns(FuncOp funcOp) { ctx, LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}), LinalgMarker({"__with_perm__"}, "L1__with_perm__")); + patterns.insert>( + ctx, + LinalgTilingOptions() + .setTileSizes({16, 8, 4}) + .setInterchange({1, 2, 0}) + .setLoopType(LinalgTilingLoopType::ParallelLoops), + LinalgMarker({"par__with_perm__"}, "after_par__with_perm__")); + //===--------------------------------------------------------------------===// // Linalg to loops patterns. //===--------------------------------------------------------------------===// From 0ed2d4c7cba8fb15e51d0f6f4e9011027c17085c Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Tue, 26 May 2020 16:35:59 -0700 Subject: [PATCH 180/770] [mlir][linalg] Allow promotion to use callbacks for alloc/dealloc/copies. Add options to LinalgPromotion to use callbacks for implementating the allocation, deallocation of buffers used for the promoted subviews, and to copy data into and from the original subviews to the allocated buffers. Also some misc. cleanup of the code. Differential Revision: https://reviews.llvm.org/D80365 --- .../Dialect/Linalg/Transforms/Transforms.h | 73 +++- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 22 -- .../Dialect/Linalg/Transforms/Promotion.cpp | 331 +++++++++++------- .../Dialect/Linalg/Transforms/Transforms.cpp | 19 +- .../Dialect/Linalg/promotion_options.mlir | 33 ++ .../lib/Transforms/TestLinalgTransforms.cpp | 70 ++++ 6 files changed, 369 insertions(+), 179 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/promotion_options.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 6d34a0943e5e3..2da631956572f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -57,18 +57,27 @@ Optional tileLinalgOp(OpBuilder &b, LinalgOp op, /// (i.e. `[1,1,2]` is an invalid permutation). LinalgOp interchange(LinalgOp op, ArrayRef interchangeVector); -/// Promotes the `subViews` into a new buffer allocated at the insertion point -/// `b`. Promotion occurs in 3 steps: -/// 1. Create a new buffer for a full tile (i.e. not clipped at the boundary). -/// 2. Take a full view on the buffer and `linalg.fill` it with zeros (use -/// float zero for now). -/// 3. Take a partial slice of the full view in step 2. and copy into it. -/// Infers statically sized buffers from subViews unless `dynamicBuffers` is -/// true. -/// -/// Returns a list of PromotionInfo which hold the promoted buffer and the -/// full and partial views indexing into the buffer. -// TODO: revisit dynamicBuffers option. +/// Callback function type used to perform the allocation for the promoted +/// `subView`. In `boundingSubViewsize` a best attempt is made to find the +/// smallest constant value for the size of the buffer needed for each +/// dimension. If that is not possible, contains the dynamic size of the +/// subview. The call back should return the buffer to use. +using AllocBufferCallbackFn = std::function( + OpBuilder &b, SubViewOp subView, ArrayRef boundingSubViewSize, + OperationFolder *folder)>; + +/// Callback function type used to deallocate the buffers used to hold the +/// promoted subview. +using DeallocBufferCallbackFn = + std::function; + +/// Callback function type used to insert copy from original subview to subview +/// of the promoted region for the read operands/subview of promoted region to +/// original subview for the results. The copy has to happen from `src` to +/// `dst`. +using CopyCallbackFn = + std::function; + struct LinalgPromotionOptions { /// Indices of subViews to promote. If `None`, try to promote all operands. Optional> operandsToPromote = None; @@ -111,10 +120,44 @@ struct LinalgPromotionOptions { alignment = align; return *this; } + /// Callback function to do the allocation of the promoted buffer. If None, + /// then the default allocation scheme of allocating a memref buffer + /// followed by a view operation is used. + Optional allocationFn = None; + Optional deallocationFn = None; + LinalgPromotionOptions & + setAllocationDeallocationFns(AllocBufferCallbackFn const &allocFn, + DeallocBufferCallbackFn const &deallocFn) { + allocationFn = allocFn; + deallocationFn = deallocFn; + return *this; + } + + /// Callback function to do the copy of data to and from the promoted + /// subview. If None then a linalg.copy is used. + Optional copyInFn = None; + Optional copyOutFn = None; + LinalgPromotionOptions &setCopyInOutFns(CopyCallbackFn const ©In, + CopyCallbackFn const ©Out) { + copyInFn = copyIn; + copyOutFn = copyOut; + return *this; + } }; -LinalgOp promoteSubViews(OpBuilder &b, LinalgOp op, - LinalgPromotionOptions options, - OperationFolder *folder = nullptr); + +/// Promotes the `subViews` into a new buffer allocated at the insertion point +/// `b`. Promotion occurs in 3 steps: +/// 1. Create a new buffer for a full tile (i.e. not clipped at the boundary). +/// 2. Take a full view on the buffer. +/// 3. Take a partial slice of the full view in step 2. and copy into it. +/// Infers statically sized buffers from subViews unless `dynamicBuffers` is +/// true. +/// +/// Returns the modified linalg op (the modification happens in place) as well +/// as all the copy ops created. +Optional promoteSubViews(OpBuilder &b, LinalgOp op, + LinalgPromotionOptions options, + OperationFolder *folder = nullptr); /// Emit a suitable vector form for a Linalg op with fully static shape. void vectorizeLinalgOp(OpBuilder &builder, Operation *op); diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index c8a5d83438f56..235dedd604017 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -117,28 +117,6 @@ SmallVector applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ArrayRef values, OperationFolder *folder = nullptr); -struct PromotionInfo { - Value buffer; - Value fullLocalView; - Value partialLocalView; -}; - -/// Promotes the `subViews` into a new buffer allocated at the insertion point -/// `b`. For now, promotion occurs in 3 steps: -/// 1. Create a new buffer for a full tile (i.e. not clipped at the boundary). -/// 2. Take a full view on the buffer and `linalg.fill` it with zeros (use -/// float zero for now). -/// 3. Take a partial slice of the full view in step 2. and copy into it. -/// Infers statically sized buffers from subViews unless `dynamicBuffers` is -/// true. -/// -/// Returns a list of PromotionInfo which hold the promoted buffer and the -/// full and partial views indexing into the buffer. -SmallVector -promoteSubViews(OpBuilder &b, Location loc, ArrayRef subViews, - bool dynamicBuffers = false, int64_t alignment = 0, - OperationFolder *folder = nullptr); - /// Returns all the operands of `linalgOp` that are not views. /// Asserts that these operands are value types to allow transformations like /// tiling to just use the values when cloning `linalgOp`. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 44de2a1021c27..de8514f0fa41a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -25,8 +25,7 @@ #include "mlir/IR/AffineMap.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/FoldUtils.h" - -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Support/CommandLine.h" using namespace mlir; @@ -35,7 +34,7 @@ using namespace mlir::edsc::intrinsics; using namespace mlir::linalg; using namespace mlir::scf; -using llvm::SetVector; +using llvm::MapVector; using folded_affine_min = FoldedValueBuilder; using folded_linalg_range = FoldedValueBuilder; @@ -45,6 +44,87 @@ using folded_std_view = FoldedValueBuilder; #define DEBUG_TYPE "linalg-promotion" +/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp +/// is a constant then return a new value set to the smallest such constant. +/// Otherwise return size. +static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc, + Value size) { + Optional boundingConst = {}; + if (auto affineMinOp = size.getDefiningOp()) { + for (auto e : affineMinOp.getAffineMap().getResults()) + if (auto cst = e.dyn_cast()) + boundingConst = boundingConst + ? std::min(boundingConst.getValue(), cst.getValue()) + : cst.getValue(); + } else if (auto constIndexOp = size.getDefiningOp()) { + if (constIndexOp.getType().isa()) + boundingConst = constIndexOp.value().cast().getInt(); + } + return boundingConst && *boundingConst >= 0 + ? b.create(loc, *boundingConst) + : size; +} + +/// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly +/// the size needed, otherwise try to allocate a static bounding box. +static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, + OperationFolder *folder, + Optional alignment = None) { + auto *ctx = size.getContext(); + auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); + IntegerAttr alignment_attr; + if (alignment.hasValue()) + alignment_attr = + IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue()); + if (!dynamicBuffers) + if (auto cst = size.getDefiningOp()) + return std_alloc( + MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx)), + ValueRange{}, alignment_attr); + Value mul = + folded_std_muli(folder, folded_std_constant_index(folder, width), size); + return std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, + alignment_attr); +} + +/// Default allocation callback function. This allocates a promoted buffer when +/// no call back to do so is provided. The default is to allocate a +/// memref<..xi8> and return a view to get a memref type of shape +/// boundingSubViewSize. +static Optional +allocBufferCallBack(OpBuilder &builder, SubViewOp subView, + ArrayRef boundingSubViewSize, bool dynamicBuffers, + Optional alignment, OperationFolder *folder) { + ShapedType viewType = subView.getType(); + int64_t rank = viewType.getRank(); + (void)rank; + assert(rank > 0 && boundingSubViewSize.size() == static_cast(rank)); + auto zero = folded_std_constant_index(folder, 0); + auto one = folded_std_constant_index(folder, 1); + + Value allocSize = one; + for (auto size : llvm::enumerate(boundingSubViewSize)) + allocSize = folded_std_muli(folder, allocSize, size.value()); + Value buffer = allocBuffer(viewType.getElementType(), allocSize, + dynamicBuffers, folder, alignment); + SmallVector dynSizes(boundingSubViewSize.size(), + ShapedType::kDynamicSize); + Value view = folded_std_view( + folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer, + zero, boundingSubViewSize); + return view; +} + +/// Default implementation of deallocation of the buffer use for promotion. It +/// expects to get the same value that the default allocation method returned, +/// i.e. result of a ViewOp. +static LogicalResult deallocCallBack(OpBuilder &b, Value fullLocalView) { + auto viewOp = fullLocalView.getDefiningOp(); + assert(viewOp && "expected full local view to be a ViewOp"); + std_dealloc(viewOp.source()); + return success(); +} + namespace { /// Helper struct that captures the information required to apply the @@ -55,81 +135,65 @@ struct LinalgOpInstancePromotionOptions { LinalgOpInstancePromotionOptions(LinalgOp op, const LinalgPromotionOptions &options); /// SubViews to promote. - SetVector subViews; + MapVector subViews; /// True if the full view should be used for the promoted buffer. DenseMap useFullTileBuffers; + + /// Callback functions for allocation and deallocation of promoted buffers, as + /// well as to copy the data into and out of these buffers. + AllocBufferCallbackFn allocationFn; + DeallocBufferCallbackFn deallocationFn; + CopyCallbackFn copyInFn; + CopyCallbackFn copyOutFn; + /// Allow the use of dynamicaly-sized buffers. bool dynamicBuffers; /// Alignment of promoted buffer. Optional alignment; }; + +struct PromotionInfo { + Value fullLocalView; + Value partialLocalView; +}; } // namespace LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( LinalgOp linalgOp, const LinalgPromotionOptions &options) - : subViews(), useFullTileBuffers(), dynamicBuffers(options.dynamicBuffers), + : subViews(), dynamicBuffers(options.dynamicBuffers), alignment(options.alignment) { unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); auto vUseFullTileBuffers = options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); - if (options.operandsToPromote.hasValue()) { - for (auto it : llvm::enumerate(options.operandsToPromote.getValue())) { - auto *op = linalgOp.getBuffer(it.value()).getDefiningOp(); - if (auto sv = dyn_cast_or_null(op)) { - subViews.insert(sv); - useFullTileBuffers[sv] = vUseFullTileBuffers[it.index()]; - } - } - } else { - for (unsigned idx = 0; idx < nBuffers; ++idx) { - auto *op = linalgOp.getBuffer(idx).getDefiningOp(); - if (auto sv = dyn_cast_or_null(op)) { - subViews.insert(sv); - useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; - } + for (unsigned idx = 0; idx != nBuffers; ++idx) { + if (options.operandsToPromote && !options.operandsToPromote->count(idx)) + continue; + auto *op = linalgOp.getBuffer(idx).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) { + subViews[idx] = sv; + useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; } } -} - -/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp -/// is a constant then return a new value set to the smallest such constant. -/// Otherwise return size. -static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc, - Value size) { - auto affineMinOp = size.getDefiningOp(); - if (!affineMinOp) - return size; - int64_t minConst = std::numeric_limits::max(); - for (auto e : affineMinOp.getAffineMap().getResults()) - if (auto cst = e.dyn_cast()) - minConst = std::min(minConst, cst.getValue()); - return (minConst == std::numeric_limits::max()) - ? size - : b.create(loc, minConst); -} -/// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly -/// the size needed, otherwise try to allocate a static bounding box. -static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, - OperationFolder *folder, - Optional alignment = None) { - auto *ctx = size.getContext(); - auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); - IntegerAttr alignment_attr; - if (alignment.hasValue()) - alignment_attr = - IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue()); - if (!dynamicBuffers) - if (auto cst = size.getDefiningOp()) - return std_alloc( - MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx)), - ValueRange{}, alignment_attr); - Value mul = - folded_std_muli(folder, folded_std_constant_index(folder, width), size); - return std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, - alignment_attr); + allocationFn = + (options.allocationFn ? *(options.allocationFn) + : [&](OpBuilder &builder, SubViewOp subViewOp, + ArrayRef boundingSubViewSize, + OperationFolder *folder) -> Optional { + return allocBufferCallBack(builder, subViewOp, boundingSubViewSize, + dynamicBuffers, alignment, folder); + }); + deallocationFn = + (options.deallocationFn ? *(options.deallocationFn) : deallocCallBack); + auto defaultCopyCallBack = [&](OpBuilder &builder, Value src, + Value dst) -> LogicalResult { + linalg_copy(src, dst); + return success(); + }; + copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack); + copyOutFn = (options.copyOutFn ? *(options.copyOutFn) : defaultCopyCallBack); } // Performs promotion of a `subView` into a local buffer of the size of the @@ -149,45 +213,41 @@ static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, // To account for general boundary effects, padding must be performed on the // boundary tiles. For now this is done with an unconditional `fill` op followed // by a partial `copy` op. -static PromotionInfo promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, - SubViewOp subView, - bool dynamicBuffers, - Optional alignment, - OperationFolder *folder) { - auto zero = folded_std_constant_index(folder, 0); - auto one = folded_std_constant_index(folder, 1); - +static Optional +promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, SubViewOp subView, + LinalgOpInstancePromotionOptions const &options, + OperationFolder *folder) { auto viewType = subView.getType(); auto rank = viewType.getRank(); - Value allocSize = one; - SmallVector fullSizes, partialSizes; + SmallVector fullSizes, partialSizes; fullSizes.reserve(rank); partialSizes.reserve(rank); for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { - auto rank = en.index(); auto rangeValue = en.value(); // Try to extract a tight constant. LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); Value size = extractSmallestConstantBoundingSize(b, loc, rangeValue.size); LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); - allocSize = folded_std_muli(folder, allocSize, size); fullSizes.push_back(size); - partialSizes.push_back(folded_std_dim(folder, subView, rank)); + partialSizes.push_back(folded_std_dim(folder, subView, en.index())); } SmallVector dynSizes(fullSizes.size(), -1); - auto buffer = allocBuffer(viewType.getElementType(), allocSize, - dynamicBuffers, folder, alignment); - auto fullLocalView = folded_std_view( - folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer, - zero, fullSizes); + // If a callback is not specified, then use the default implementation for + // allocating the promoted buffer. + Optional fullLocalView = + options.allocationFn(b, subView, fullSizes, folder); + if (!fullLocalView) + return {}; + auto zero = folded_std_constant_index(folder, 0); + auto one = folded_std_constant_index(folder, 1); SmallVector zeros(fullSizes.size(), zero); SmallVector ones(fullSizes.size(), one); auto partialLocalView = - folded_std_subview(folder, fullLocalView, zeros, partialSizes, ones); - return PromotionInfo{buffer, fullLocalView, partialLocalView}; + folded_std_subview(folder, *fullLocalView, zeros, partialSizes, ones); + return PromotionInfo{*fullLocalView, partialLocalView}; } -static SmallVector +static Optional> promoteSubViews(OpBuilder &b, Location loc, LinalgOpInstancePromotionOptions options, OperationFolder *folder) { @@ -195,24 +255,18 @@ promoteSubViews(OpBuilder &b, Location loc, return {}; ScopedContext scope(b, loc); - SmallVector res; - res.reserve(options.subViews.size()); - DenseMap promotionInfoMap; - for (auto v : options.subViews) { - SubViewOp subView = cast(v.getDefiningOp()); - auto promotionInfo = promoteSubviewAsNewBuffer( - b, loc, subView, options.dynamicBuffers, options.alignment, folder); - promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo)); - res.push_back(promotionInfo); - } + MapVector promotionInfoMap; for (auto v : options.subViews) { - SubViewOp subView = cast(v.getDefiningOp()); - auto info = promotionInfoMap.find(v); - if (info == promotionInfoMap.end()) - continue; + SubViewOp subView = cast(v.second.getDefiningOp()); + Optional promotionInfo = + promoteSubviewAsNewBuffer(b, loc, subView, options, folder); + if (!promotionInfo) + return {}; + promotionInfoMap[v.first] = *promotionInfo; + // Only fill the buffer if the full local view is used - if (!options.useFullTileBuffers[v]) + if (!options.useFullTileBuffers[v.second]) continue; Value fillVal; if (auto t = subView.getType().getElementType().dyn_cast()) @@ -220,75 +274,80 @@ promoteSubViews(OpBuilder &b, Location loc, else if (auto t = subView.getType().getElementType().dyn_cast()) fillVal = folded_std_constant_int(folder, 0, t); - // TODO(ntv): fill is only necessary if `promotionInfo` has a full local - // view that is different from the partial local view and we are on the - // boundary. - linalg_fill(info->second.fullLocalView, fillVal); + linalg_fill(promotionInfo->fullLocalView, fillVal); } + // Copy data into the promoted buffers. Use callback if provided. for (auto v : options.subViews) { - auto info = promotionInfoMap.find(v); + auto info = promotionInfoMap.find(v.first); if (info == promotionInfoMap.end()) continue; - linalg_copy(cast(v.getDefiningOp()), - info->second.partialLocalView); + if (failed(options.copyInFn(b, cast(v.second.getDefiningOp()), + info->second.partialLocalView))) + return {}; } - return res; + return promotionInfoMap; } -static void promoteSubViews(OpBuilder &b, LinalgOp op, - LinalgOpInstancePromotionOptions options, - OperationFolder *folder) { +static Optional +promoteSubViews(OpBuilder &b, LinalgOp op, + LinalgOpInstancePromotionOptions options, + OperationFolder *folder) { assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); if (auto convOp = dyn_cast(op.getOperation())) { // TODO(ntv): add a level of indirection to linalg.generic. if (convOp.padding()) - llvm_unreachable("Unexpected conv with padding"); + return {}; } // 1. Promote the specified views and use them in the new op. auto loc = op.getLoc(); - auto promotedBufferAndViews = promoteSubViews(b, loc, options, folder); + auto promotedBuffersAndViews = promoteSubViews(b, loc, options, folder); + if (!promotedBuffersAndViews || + promotedBuffersAndViews->size() != options.subViews.size()) + return {}; + + // 2. Append all other operands as they appear, this enforces that such + // operands are not views. This is to support cases such as FillOp taking + // extra scalars etc. Keep a reference to output buffers; SmallVector opViews; opViews.reserve(op.getNumInputsAndOutputs()); SmallVector, 8> writebackViews; - writebackViews.reserve(promotedBufferAndViews.size()); - unsigned promotedIdx = 0; - for (auto view : op.getInputsAndOutputBuffers()) { - if (options.subViews.count(view) != 0) { - if (options.useFullTileBuffers[view]) - opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); + writebackViews.reserve(promotedBuffersAndViews->size()); + for (auto view : llvm::enumerate(op.getInputsAndOutputBuffers())) { + if (options.subViews.count(view.index()) != 0) { + if (options.useFullTileBuffers[view.value()]) + opViews.push_back( + (*promotedBuffersAndViews)[view.index()].fullLocalView); else - opViews.push_back(promotedBufferAndViews[promotedIdx].partialLocalView); - writebackViews.emplace_back(std::make_pair( - view, promotedBufferAndViews[promotedIdx].partialLocalView)); - promotedIdx++; + opViews.push_back( + (*promotedBuffersAndViews)[view.index()].partialLocalView); + if (view.index() >= op.getNumInputs()) + writebackViews.emplace_back(std::make_pair( + view.value(), + (*promotedBuffersAndViews)[view.index()].partialLocalView)); } else { - opViews.push_back(view); + opViews.push_back(view.value()); } } - - // 2. Append all other operands as they appear, this enforces that such - // operands are not views. This is to support cases such as FillOp taking - // extra scalars etc. - // Keep a reference to output buffers; - DenseSet originalOutputs(op.getOutputBuffers().begin(), - op.getOutputBuffers().end()); op.getOperation()->setOperands(0, opViews.size(), opViews); OpBuilder::InsertionGuard guard(b); b.setInsertionPointAfter(op); ScopedContext scope(b, loc); // 3. Emit write-back for the promoted output views: copy the partial view. - for (auto viewAndPartialLocalView : writebackViews) - if (originalOutputs.count(viewAndPartialLocalView.first)) - linalg_copy(viewAndPartialLocalView.second, - viewAndPartialLocalView.first); + for (auto viewAndPartialLocalView : writebackViews) { + if (failed(options.copyOutFn(b, viewAndPartialLocalView.second, + viewAndPartialLocalView.first))) + return {}; + } // 4. Dealloc all local buffers. - for (const auto &pi : promotedBufferAndViews) - std_dealloc(pi.buffer); + for (const auto &pi : *promotedBuffersAndViews) { + options.deallocationFn(b, pi.second.fullLocalView); + } + return op; } LogicalResult @@ -312,13 +371,13 @@ mlir::linalg::promoteSubviewsPrecondition(Operation *op, return failure(); } -LinalgOp mlir::linalg::promoteSubViews(OpBuilder &b, LinalgOp linalgOp, - LinalgPromotionOptions options, - OperationFolder *folder) { +Optional mlir::linalg::promoteSubViews(OpBuilder &b, + LinalgOp linalgOp, + LinalgPromotionOptions options, + OperationFolder *folder) { LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); - ::promoteSubViews( + return ::promoteSubViews( b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder); - return linalgOp; } namespace { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 2ce949aa034c4..527d162298bf4 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -179,12 +179,19 @@ LogicalResult mlir::linalg::LinalgBasePromotionPattern::matchAndRewrite( return failure(); if (failed(promoteSubviewsPrecondition(op, options))) return failure(); - rewriter.updateRootInPlace(op, [&]() { - auto promotedOp = promoteSubViews(rewriter, op, options); - (void)promotedOp; - assert(promotedOp && "Unexpected pattern failure"); - marker.replaceLinalgMarker(rewriter, op); - }); + + // TODO: We cannot use root update here. This pattern is creating other ops, + // so if the promotion fails, those need to be cleaned up, which doesnt seem + // to be happening here. So to fail properly, we should be cloning the op and + // deleting the previous op. This needs more investigation. + rewriter.startRootUpdate(op); + Optional promotedOp = promoteSubViews(rewriter, op, options); + if (!promotedOp) { + rewriter.cancelRootUpdate(op); + return op->emitError("subview promotion failed"); + } + rewriter.finalizeRootUpdate(op); + marker.replaceLinalgMarker(rewriter, op); return success(); } diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir new file mode 100644 index 0000000000000..e6c8e2158fc3e --- /dev/null +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-promotion-options -split-input-file | FileCheck %s + +func @gemm(%a : memref, %b : memref, %c : memref) +{ + linalg.matmul(%a, %b, %c) {__internal_linalg_transform__ = "START"} + : memref, memref, memref + return +} + +// CHECK: func @gemm +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref +// CHECK-DAG: %[[C42:.+]] = constant 4.200000e+01 : f32 +// CHECK: scf.for +// CHECK: scf.for +// CHECK: scf.for +// CHECK: %[[T7:.+]] = subview %[[ARG0]] +// CHECK: %[[T12:.+]] = subview %[[ARG1]] +// CHECK: %[[T17:.+]] = subview %[[ARG2]] +// CHECK: %[[T18:.+]] = alloc(%{{.*}}, %{{.*}}) : memref +// CHECK: %[[T19:.+]] = subview %[[T18]] +// CHECK: %[[T20:.+]] = alloc(%{{.*}}, %{{.*}}) : memref +// CHECK: %[[T21:.+]] = subview %[[T20]] +// CHECK: linalg.fill(%[[T19]], %[[C42]]) +// CHECK: linalg.copy(%[[T7]], %[[T19]]) +// CHECK: linalg.fill(%[[T21]], %[[C42]]) +// CHECK: linalg.copy(%[[T17]], %[[T21]]) +// CHECK: linalg.matmul(%[[T19]], %[[T12]], %[[T21]]) +// CHECK-NOT: linalg.fill +// CHECK: linalg.copy(%[[T21]], %[[T17]]) +// CHECK: dealloc %[[T18]] +// CHECK: dealloc %[[T20]] diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 7547e2953ef21..c38494fe27783 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -45,6 +45,9 @@ struct TestLinalgTransforms "Test a fused pass that applies patterns from matmul to vectors via " "2-d tiling"), llvm::cl::init(false)}; + Option testPromotionOptions{*this, "test-linalg-promotion-options", + llvm::cl::desc("Test promotion options"), + llvm::cl::init(false)}; }; } // end anonymous namespace @@ -197,10 +200,77 @@ static void fillL1TilingAndMatmulToVectorPatterns( LinalgVectorizationPattern>(context); } +//===----------------------------------------------------------------------===// +// Test promotion callbacks +//===----------------------------------------------------------------------===// + +// Allocation call back +static Optional allocCallBackFn(OpBuilder &b, SubViewOp subView, + ArrayRef boundingSubViewSize, + OperationFolder *folder) { + SmallVector shape(boundingSubViewSize.size(), -1); + return b + .create(subView.getLoc(), + MemRefType::get(shape, + subView.getType().getElementType(), + /*affineMapComposition =*/{}, 3), + boundingSubViewSize) + .getResult(); +} + +// Deallocation callback +static LogicalResult deallocCallBackFn(OpBuilder &b, Value buffer) { + b.create(buffer.getLoc(), buffer); + return success(); +} + +// Copy in call back +static LogicalResult copyCallBackFn(OpBuilder &b, Value src, Value dst, + bool isOutput) { + auto floatType = src.getType().cast().getElementType(); + if (!floatType.isa()) + return failure(); + if (!isOutput) + b.create( + src.getLoc(), dst, + b.create(src.getLoc(), FloatAttr::get(floatType, 42.0))); + b.create(src.getLoc(), src, dst); + return success(); +} + +void fillPromotionCallBackPatterns(MLIRContext *context, + OwningRewritePatternList &patterns) { + patterns.insert>( + context, LinalgTilingOptions().setTileSizes({16, 16, 16}), + LinalgMarker({"START"}, "PROMOTE")); + patterns.insert>( + context, + LinalgPromotionOptions() + .setOperandsToPromote({0, 2}) + .setUseFullTileBuffers({false, false}) + .setAllocationDeallocationFns(allocCallBackFn, deallocCallBackFn) + .setCopyInOutFns( + [](OpBuilder &b, Value src, Value dst) -> LogicalResult { + copyCallBackFn(b, src, dst, false); + return success(); + }, + [](OpBuilder &b, Value src, Value dst) -> LogicalResult { + copyCallBackFn(b, src, dst, true); + return success(); + }), + LinalgMarker({"PROMOTE"})); +} + /// Apply transformations specified as patterns. void TestLinalgTransforms::runOnFunction() { if (testPatterns) { applyPatterns(getFunction()); + return; + } + if (testPromotionOptions) { + OwningRewritePatternList patterns; + fillPromotionCallBackPatterns(&getContext(), patterns); + applyPatternsAndFoldGreedily(getFunction(), patterns); } else { SmallVector stage1Patterns; if (testMatmulToVectorPatterns1dTiling) { From 9f69d3d0bc65ff50b1dc3ab0a6a08ddc32b190a6 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Tue, 26 May 2020 22:22:09 -0700 Subject: [PATCH 181/770] [libc][NFC][Obvious] Convert the MPFR operations enum to an enum class. This was suggested in https://reviews.llvm.org/D79149. --- libc/test/src/math/cosf_test.cpp | 8 ++++---- libc/test/src/math/exp2f_test.cpp | 19 ++++++++++--------- libc/test/src/math/expf_test.cpp | 10 +++++----- libc/test/src/math/fabs_test.cpp | 2 +- libc/test/src/math/fabsf_test.cpp | 3 ++- libc/test/src/math/sincosf_test.cpp | 16 ++++++++-------- libc/test/src/math/sinf_test.cpp | 10 +++++----- libc/utils/MPFRWrapper/MPFRUtils.cpp | 14 +++++++------- libc/utils/MPFRWrapper/MPFRUtils.h | 2 +- 9 files changed, 43 insertions(+), 41 deletions(-) diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp index f9fc9c2e2d0d1..1f9dffd87c10a 100644 --- a/libc/test/src/math/cosf_test.cpp +++ b/libc/test/src/math/cosf_test.cpp @@ -80,7 +80,7 @@ TEST(CosfTest, InFloatRange) { float x = valueFromBits(v); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::OP_Cos, x, __llvm_libc::cosf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), tolerance); } } @@ -88,12 +88,12 @@ TEST(CosfTest, InFloatRange) { TEST(CosfTest, SmallValues) { float x = valueFromBits(0x17800000U); float result = __llvm_libc::cosf(x); - EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result, tolerance); EXPECT_EQ(BitPatterns::one, valueAsBits(result)); x = valueFromBits(0x0040000U); result = __llvm_libc::cosf(x); - EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result, tolerance); EXPECT_EQ(BitPatterns::one, valueAsBits(result)); } @@ -102,6 +102,6 @@ TEST(CosfTest, SmallValues) { TEST(CosfTest, SDCOMP_26094) { for (uint32_t v : sdcomp26094Values) { float x = valueFromBits(v); - ASSERT_MPFR_MATCH(mpfr::OP_Cos, x, __llvm_libc::cosf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), tolerance); } } diff --git a/libc/test/src/math/exp2f_test.cpp b/libc/test/src/math/exp2f_test.cpp index dbb7046e28bda..c900ec6695444 100644 --- a/libc/test/src/math/exp2f_test.cpp +++ b/libc/test/src/math/exp2f_test.cpp @@ -92,27 +92,27 @@ TEST(ExpfTest, Borderline) { llvmlibc_errno = 0; x = valueFromBits(0x42fa0001U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0x42ffffffU); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0xc2fa0001U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0xc2fc0000U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0xc2fc0001U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0xc3150000U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); } @@ -124,12 +124,12 @@ TEST(ExpfTest, Underflow) { llvmlibc_errno = 0; float x = valueFromBits(0xc3158000U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, ERANGE); llvmlibc_errno = 0; x = valueFromBits(0xc3165432U); - EXPECT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), tolerance); EXPECT_EQ(llvmlibc_errno, ERANGE); } @@ -149,6 +149,7 @@ TEST(exp2fTest, InFloatRange) { // wider precision. if (isnan(result) || isinf(result) || llvmlibc_errno != 0) continue; - ASSERT_MPFR_MATCH(mpfr::OP_Exp2, x, __llvm_libc::exp2f(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Exp2, x, __llvm_libc::exp2f(x), + tolerance); } } diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp index aa50bd71974b5..c99058dbf6e5f 100644 --- a/libc/test/src/math/expf_test.cpp +++ b/libc/test/src/math/expf_test.cpp @@ -109,19 +109,19 @@ TEST(ExpfTest, Borderline) { llvmlibc_errno = 0; x = valueFromBits(0x42affff8U); - ASSERT_MPFR_MATCH(mpfr::OP_Exp, x, __llvm_libc::expf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Exp, x, __llvm_libc::expf(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0x42b00008U); - ASSERT_MPFR_MATCH(mpfr::OP_Exp, x, __llvm_libc::expf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Exp, x, __llvm_libc::expf(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0xc2affff8U); - ASSERT_MPFR_MATCH(mpfr::OP_Exp, x, __llvm_libc::expf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Exp, x, __llvm_libc::expf(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); x = valueFromBits(0xc2b00008U); - ASSERT_MPFR_MATCH(mpfr::OP_Exp, x, __llvm_libc::expf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Exp, x, __llvm_libc::expf(x), tolerance); EXPECT_EQ(llvmlibc_errno, 0); } @@ -141,6 +141,6 @@ TEST(ExpfTest, InFloatRange) { // wider precision. if (isnan(result) || isinf(result) || llvmlibc_errno != 0) continue; - ASSERT_MPFR_MATCH(mpfr::OP_Exp, x, __llvm_libc::expf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Exp, x, __llvm_libc::expf(x), tolerance); } } diff --git a/libc/test/src/math/fabs_test.cpp b/libc/test/src/math/fabs_test.cpp index a4c934b07f5a6..a9ce9e764298b 100644 --- a/libc/test/src/math/fabs_test.cpp +++ b/libc/test/src/math/fabs_test.cpp @@ -59,6 +59,6 @@ TEST(FabsTest, InDoubleRange) { double x = valueFromBits(v); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::OP_Abs, x, __llvm_libc::fabs(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Abs, x, __llvm_libc::fabs(x), tolerance); } } diff --git a/libc/test/src/math/fabsf_test.cpp b/libc/test/src/math/fabsf_test.cpp index 40e61e6091b64..4231a251bf137 100644 --- a/libc/test/src/math/fabsf_test.cpp +++ b/libc/test/src/math/fabsf_test.cpp @@ -61,6 +61,7 @@ TEST(FabsfTest, InFloatRange) { double x = valueFromBits(v); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::OP_Abs, x, __llvm_libc::fabsf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Abs, x, __llvm_libc::fabsf(x), + tolerance); } } diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp index 9a87e2c9e58c5..66b247aeb0afd 100644 --- a/libc/test/src/math/sincosf_test.cpp +++ b/libc/test/src/math/sincosf_test.cpp @@ -95,8 +95,8 @@ TEST(SinCosfTest, InFloatRange) { float sin, cos; __llvm_libc::sincosf(x, &sin, &cos); - ASSERT_MPFR_MATCH(mpfr::OP_Cos, x, cos, tolerance); - ASSERT_MPFR_MATCH(mpfr::OP_Sin, x, sin, tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, cos, tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, sin, tolerance); } } @@ -106,16 +106,16 @@ TEST(SinCosfTest, SmallValues) { float x = valueFromBits(bits); float result_cos, result_sin; __llvm_libc::sincosf(x, &result_sin, &result_cos); - EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result_cos, tolerance); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result_sin, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result_cos, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result_sin, tolerance); EXPECT_EQ(BitPatterns::one, valueAsBits(result_cos)); EXPECT_EQ(bits, valueAsBits(result_sin)); bits = 0x00400000; x = valueFromBits(bits); __llvm_libc::sincosf(x, &result_sin, &result_cos); - EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result_cos, tolerance); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result_sin, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result_cos, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result_sin, tolerance); EXPECT_EQ(BitPatterns::one, valueAsBits(result_cos)); EXPECT_EQ(bits, valueAsBits(result_sin)); } @@ -127,7 +127,7 @@ TEST(SinCosfTest, SDCOMP_26094) { float x = valueFromBits(v); float sin, cos; __llvm_libc::sincosf(x, &sin, &cos); - EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, cos, tolerance); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, sin, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, cos, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, sin, tolerance); } } diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp index e0821c621dccf..437281ada43a1 100644 --- a/libc/test/src/math/sinf_test.cpp +++ b/libc/test/src/math/sinf_test.cpp @@ -80,13 +80,13 @@ TEST(SinfTest, InFloatRange) { float x = valueFromBits(v); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::OP_Sin, x, __llvm_libc::sinf(x), tolerance); + ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), tolerance); } } TEST(SinfTest, SpecificBitPatterns) { float x = valueFromBits(0xc70d39a1); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, __llvm_libc::sinf(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), tolerance); } // For small values, sin(x) is x. @@ -94,13 +94,13 @@ TEST(SinfTest, SmallValues) { uint32_t bits = 0x17800000; float x = valueFromBits(bits); float result = __llvm_libc::sinf(x); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, tolerance); EXPECT_EQ(bits, valueAsBits(result)); bits = 0x00400000; x = valueFromBits(bits); result = __llvm_libc::sinf(x); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result, tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, tolerance); EXPECT_EQ(bits, valueAsBits(result)); } @@ -109,6 +109,6 @@ TEST(SinfTest, SmallValues) { TEST(SinfTest, SDCOMP_26094) { for (uint32_t v : sdcomp26094Values) { float x = valueFromBits(v); - EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, __llvm_libc::sinf(x), tolerance); + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), tolerance); } } diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 74c2f760f0347..51c8c37592921 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -86,21 +86,21 @@ class MPFRNumber { mpfr_init2(value, mpfrPrecision); MPFRNumber mpfrInput(rawValue); switch (op) { - case OP_Abs: + case Operation::Abs: mpfr_abs(value, mpfrInput.value, MPFR_RNDN); break; - case OP_Cos: + case Operation::Cos: mpfr_cos(value, mpfrInput.value, MPFR_RNDN); break; - case OP_Sin: - mpfr_sin(value, mpfrInput.value, MPFR_RNDN); - break; - case OP_Exp: + case Operation::Exp: mpfr_exp(value, mpfrInput.value, MPFR_RNDN); break; - case OP_Exp2: + case Operation::Exp2: mpfr_exp2(value, mpfrInput.value, MPFR_RNDN); break; + case Operation::Sin: + mpfr_sin(value, mpfrInput.value, MPFR_RNDN); + break; } } diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index f6660f2fa78e7..e39ed91281a9d 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -39,7 +39,7 @@ struct Tolerance { uint32_t bits; }; -enum Operation { OP_Abs, OP_Cos, OP_Sin, OP_Exp, OP_Exp2 }; +enum class Operation : int { Abs, Cos, Exp, Exp2, Sin }; namespace internal { From 6bbaa62d26b6061c93eb62c82048c14014ab7bd7 Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Fri, 22 May 2020 18:01:53 +0300 Subject: [PATCH 182/770] [analyzer] Add support for IE of keyboard and mouse navigation in HTML report IE throws errors while using key and mouse navigation through the error path tips. querySelectorAll method returns NodeList. NodeList belongs to browser API. IE doesn't have forEach among NodeList's methods. At the same time Array is a JavaScript object and can be used instead. The fix is in the converting NodeList into Array and keeps using forEach method as before. Checked in IE11, Chrome and Opera. Differential Revision: https://reviews.llvm.org/D80444 --- clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 184fdcfb3d4b7..bc7c41d039c4d 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -1070,8 +1070,13 @@ StringRef HTMLDiagnostics::generateKeyboardNavigationJavascript() {