From 6faf17b7626bfdeea977a7a333c6e20ed677615d Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 2 Dec 2024 16:15:52 -0800 Subject: [PATCH 001/191] [ThinLTO]Supports declaration import for global variables in distributed ThinLTO (#117616) When `-import-declaration` option is enabled, declaration import is supported for functions. https://github.com/llvm/llvm-project/pull/88024 has the context for this option. This patch supports declaration import for global variables in distributed ThinLTO. The motivating use case is to propagate `dso_local` attribute of global variables across modules, to optimize global variable access when a binary is built with `-fno-direct-access-external-data`. * With `-fdirect-access-external-data`, non thread-local global variables will [have `dso_local` attributes](https://github.com/llvm/llvm-project/blob/fe3c23b439b9a2d00442d9bc6a4ca86f73066a3d/clang/lib/CodeGen/CodeGenModule.cpp#L1730-L1746). This optimizes the global variable access as shown by https://gcc.godbolt.org/z/vMzWcKdh3 --- llvm/include/llvm/IR/ModuleSummaryIndex.h | 5 ++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 3 +- llvm/lib/IR/ModuleSummaryIndex.cpp | 18 +++++++++-- llvm/lib/Transforms/IPO/FunctionImport.cpp | 12 ++++++-- .../ThinLTO/X86/import_callee_declaration.ll | 30 ++++++++++++++++++- 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index a4eb75ceb6930..2a05c2ac0758c 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1913,6 +1913,11 @@ class ModuleSummaryIndex { /// Checks if we can import global variable from another module. bool canImportGlobalVar(const GlobalValueSummary *S, bool AnalyzeRefs) const; + + /// Same as above but checks whether the global var is importable as a + /// declaration. + bool canImportGlobalVar(const GlobalValueSummary *S, bool AnalyzeRefs, + bool &CanImportDecl) const; }; /// GraphTraits definition to build SCC for the index diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 63f4e34074e06..0444cb9e1bce5 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4900,7 +4900,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.push_back(*ValueId); assert(ModuleIdMap.count(VS->modulePath())); NameVals.push_back(ModuleIdMap[VS->modulePath()]); - NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); + NameVals.push_back( + getEncodedGVSummaryFlags(VS->flags(), shouldImportValueAsDecl(VS))); NameVals.push_back(getEncodedGVarFlags(VS->varflags())); for (auto &RI : VS->refs()) { auto RefValueId = getValueId(RI.getGUID()); diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 12a558b3bc1b1..d9024b0a8673f 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -328,6 +328,13 @@ void ModuleSummaryIndex::propagateAttributes( bool ModuleSummaryIndex::canImportGlobalVar(const GlobalValueSummary *S, bool AnalyzeRefs) const { + bool CanImportDecl; + return canImportGlobalVar(S, AnalyzeRefs, CanImportDecl); +} + +bool ModuleSummaryIndex::canImportGlobalVar(const GlobalValueSummary *S, + bool AnalyzeRefs, + bool &CanImportDecl) const { auto HasRefsPreventingImport = [this](const GlobalVarSummary *GVS) { // We don't analyze GV references during attribute propagation, so // GV with non-trivial initializer can be marked either read or @@ -348,13 +355,20 @@ bool ModuleSummaryIndex::canImportGlobalVar(const GlobalValueSummary *S, }; auto *GVS = cast(S->getBaseObject()); + const bool nonInterposable = + !GlobalValue::isInterposableLinkage(S->linkage()); + const bool eligibleToImport = !S->notEligibleToImport(); + + // It's correct to import a global variable only when it is not interposable + // and eligible to import. + CanImportDecl = (nonInterposable && eligibleToImport); + // Global variable with non-trivial initializer can be imported // if it's readonly. This gives us extra opportunities for constant // folding and converting indirect calls to direct calls. We don't // analyze GV references during attribute propagation, because we // don't know yet if it is readonly or not. - return !GlobalValue::isInterposableLinkage(S->linkage()) && - !S->notEligibleToImport() && + return nonInterposable && eligibleToImport && (!AnalyzeRefs || !HasRefsPreventingImport(GVS)); } diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 9cca3cdc76145..fde43bb354e83 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -430,10 +430,18 @@ class GlobalsImporter final { // than as part of the logic deciding which functions to import (i.e. // based on profile information). Should we decide to handle them here, // we can refactor accordingly at that time. - if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) || + bool CanImportDecl = false; + if (!GVS || shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(), - Summary.modulePath())) + Summary.modulePath()) || + !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true, + CanImportDecl)) { + if (ImportDeclaration && CanImportDecl) + ImportList.maybeAddDeclaration(RefSummary->modulePath(), + VI.getGUID()); + continue; + } // If there isn't an entry for GUID, insert pair. // Otherwise, definition should take precedence over declaration. diff --git a/llvm/test/ThinLTO/X86/import_callee_declaration.ll b/llvm/test/ThinLTO/X86/import_callee_declaration.ll index 246920e5db0dc..72550fa4d6f0b 100644 --- a/llvm/test/ThinLTO/X86/import_callee_declaration.ll +++ b/llvm/test/ThinLTO/X86/import_callee_declaration.ll @@ -34,11 +34,14 @@ ; RUN: -r=main.bc,main,px \ ; RUN: -r=main.bc,small_func, \ ; RUN: -r=main.bc,large_func, \ +; RUN: -r=main.bc,read_write_global_vars, \ +; RUN: -r=main.bc,external_func, \ ; RUN: -r=lib.bc,callee,pl \ ; RUN: -r=lib.bc,large_indirect_callee,px \ ; RUN: -r=lib.bc,large_indirect_bar,px \ ; RUN: -r=lib.bc,small_func,px \ ; RUN: -r=lib.bc,large_func,px \ +; RUN: -r=lib.bc,read_write_global_vars,px \ ; RUN: -r=lib.bc,large_indirect_callee_alias,px \ ; RUN: -r=lib.bc,large_indirect_bar_alias,px \ ; RUN: -r=lib.bc,calleeAddrs,px -r=lib.bc,calleeAddrs2,px -o summary main.bc lib.bc 2>&1 | FileCheck %s --check-prefix=DUMP @@ -71,13 +74,22 @@ ; MAIN-DIS: [[LARGEINDIRECT:\^[0-9]+]] = gv: (guid: 14343440786664691134, summaries: (function: (module: [[LIBMOD]], flags: ({{.*}} importType: declaration), insts: 8, {{.*}}))) ; MAIN-DIS: gv: (guid: 16730173943625350469, summaries: (alias: (module: [[LIBMOD]], flags: ({{.*}} importType: declaration), aliasee: [[LARGEINDIRECT]]))) +; RUN: opt -passes=function-import -import-all-index -summary-file=main.bc.thinlto.bc main.bc -o main-after-import.bc +; RUN: llvm-dis -o - main-after-import.bc | FileCheck %s --check-prefix=MAIN-IMPORT + +; Tests that dso_local attribute is applied on a global var from its summary. +MAIN-IMPORT: @read_write_global_vars = external dso_local global [1 x ptr] + ; Run in-process ThinLTO and tests that ; 1. `callee` remains internalized even if the symbols of its callers ; (large_func, large_indirect_callee, large_indirect_bar) are exported as ; declarations and visible to main module. ; 2. the debugging logs from `function-import` pass are expected. +; Set relocation model to static so the dso_local attribute from a summary is +; applied on the global variable declaration. ; RUN: llvm-lto2 run \ +; RUN: -relocation-model=static \ ; RUN: -debug-only=function-import \ ; RUN: -save-temps \ ; RUN: -thinlto-threads=1 \ @@ -87,11 +99,14 @@ ; RUN: -r=main.bc,main,px \ ; RUN: -r=main.bc,small_func, \ ; RUN: -r=main.bc,large_func, \ +; RUN: -r=main.bc,read_write_global_vars, \ +; RUN: -r=main.bc,external_func, \ ; RUN: -r=lib.bc,callee,pl \ ; RUN: -r=lib.bc,large_indirect_callee,px \ ; RUN: -r=lib.bc,large_indirect_bar,px \ ; RUN: -r=lib.bc,small_func,px \ ; RUN: -r=lib.bc,large_func,px \ +; RUN: -r=lib.bc,read_write_global_vars,px \ ; RUN: -r=lib.bc,large_indirect_callee_alias,px \ ; RUN: -r=lib.bc,large_indirect_bar_alias,px \ ; RUN: -r=lib.bc,calleeAddrs,px -r=lib.bc,calleeAddrs2,px -o in-process main.bc lib.bc 2>&1 | FileCheck %s --check-prefix=IMPORTDUMP @@ -103,7 +118,7 @@ ; IMPORTDUMP-DAG: Is importing function definition 13568239288960714650 small_indirect_callee from lib.cc ; IMPORTDUMP-DAG: Is importing function definition 6976996067367342685 small_func from lib.cc ; IMPORTDUMP-DAG: Is importing function declaration 2418497564662708935 large_func from lib.cc -; IMPORTDUMP-DAG: Not importing global 7680325410415171624 calleeAddrs from lib.cc +; IMPORTDUMP-DAG: Is importing global declaration 7680325410415171624 calleeAddrs from lib.cc ; IMPORTDUMP-DAG: Is importing alias declaration 16730173943625350469 large_indirect_callee_alias from lib.cc ; IMPORTDUMP-DAG: Is importing alias declaration 13590951773474913315 large_indirect_bar_alias from lib.cc ; IMPORTDUMP-DAG: Not importing function 13770917885399536773 large_indirect_bar @@ -115,6 +130,8 @@ ; IMPORT-DAG: define available_externally void @small_func ; IMPORT-DAG: define available_externally hidden void @small_indirect_callee ; IMPORT-DAG: declare void @large_func +; Tests that dso_local attribute is applied on a global var from its summary. +; IMPORT-DAG: @read_write_global_vars = external dso_local global [1 x ptr] ; IMPORT-NOT: large_indirect_callee ; IMPORT-NOT: large_indirect_callee_alias ; IMPORT-NOT: large_indirect_bar @@ -126,9 +143,14 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +@read_write_global_vars = external global [1 x ptr] + define i32 @main() { call void @small_func() call void @large_func() + %num = call ptr @external_func(ptr @read_write_global_vars) + store ptr %num, ptr getelementptr inbounds ([1 x ptr], ptr @read_write_global_vars, i64 0, i64 0) + %res1 = call i32 @external_func(ptr @read_write_global_vars) ret i32 0 } @@ -137,6 +159,8 @@ declare void @small_func() ; large_func without attributes declare void @large_func() +declare ptr @external_func(ptr) + ;--- lib.ll source_filename = "lib.cc" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" @@ -149,6 +173,10 @@ target triple = "x86_64-unknown-linux-gnu" ; large_indirect_bar_alias is visible to main.ll but its aliasee isn't. @calleeAddrs2 = global [1 x ptr] [ptr @large_indirect_bar_alias] +; @read_write_global_vars is not read-only nor write-only (in main.ll). It's not +; a constant global var and has references, so it's not importable as a definition. +@read_write_global_vars = dso_local global [1 x ptr] [ptr @large_indirect_callee] + define void @callee() #1 { ret void } From b2dd4fabacd72d650a15116f361b1aa0020e2368 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 2 Dec 2024 16:16:33 -0800 Subject: [PATCH 002/191] [NFC]Update a ThinLTO test case (#117584) Run `function-import` pass in a similar way that it runs in a ThinLTO distributed backend compile and checks that function alias is imported as a definition. More accurately, the function alias [is](https://github.com/llvm/llvm-project/blob/0c98776159cea0d1f391a8e1ac290483d4490240/llvm/lib/Transforms/IPO/FunctionImport.cpp#L1912) a [clone](https://github.com/llvm/llvm-project/blob/0c98776159cea0d1f391a8e1ac290483d4490240/llvm/lib/Transforms/IPO/FunctionImport.cpp#L1782) of the aliasee function. --- llvm/test/ThinLTO/X86/distributed_indexes.ll | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/test/ThinLTO/X86/distributed_indexes.ll b/llvm/test/ThinLTO/X86/distributed_indexes.ll index 4f2662b1b34e1..824c582c2025e 100644 --- a/llvm/test/ThinLTO/X86/distributed_indexes.ll +++ b/llvm/test/ThinLTO/X86/distributed_indexes.ll @@ -48,6 +48,10 @@ ; RUN: llvm-dis %t1.bc.thinlto.bc -o - | FileCheck %s --check-prefix=DIS ; DIS: aliasee: null +; RUN: opt -passes=function-import -import-all-index -summary-file=%t1.bc.thinlto.bc %t1.bc -S -o - 2>&1 | FileCheck %s --check-prefix=IR +; Tests that analias definition is imported. +; IR: define available_externally void @analias + target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" declare void @g(...) From b80a157d12ebeebb85fa0a1e53beb5b46dc16d36 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 2 Dec 2024 19:23:12 -0500 Subject: [PATCH 003/191] AMDGPU: Add codegen support for gfx950 v_ashr_pk_i8/u8_i32 (#118304) Co-authored-by: Sirish Pande --- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 17 ++++++ llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll | 62 ++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 065abde62af8a..ff9376e635af9 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1457,6 +1457,23 @@ let SubtargetPredicate = HasAshrPkInsts, isReMaterializable = 1 in { defm V_ASHR_PK_U8_I32 : VOP3Inst<"v_ashr_pk_u8_i32", VOP3_Profile, int_amdgcn_ashr_pk_u8_i32>; } // End SubtargetPredicate = HasAshrPkInsts, isReMaterializable = 1 +class AshrPkI8Pat: GCNPat< + (i16 (or (i16 (shl (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src1, i32:$src2)), (i32 lo), (i32 hi))))), (i16 8))), + (i16 (and (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src0, i32:$src2)), (i32 lo), (i32 hi))))), (i16 255))))), + (inst 0, VSrc_b32:$src0, 0, VSrc_b32:$src1, 0, VSrc_b32:$src2, 0 ) +>; + +class AshrPkU8Pat: GCNPat< + (i16 (or (i16 (shl (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src1, i32:$src2)), (i32 lo), (i32 hi))))), (i16 8))), + (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src0, i32:$src2)), (i32 lo), (i32 hi))))))), + (inst 0, VSrc_b32:$src0, 0, VSrc_b32:$src1, 0, VSrc_b32:$src2, 0 ) +>; + +let SubtargetPredicate = HasAshrPkInsts in { + def : AshrPkI8Pat; + def : AshrPkU8Pat; +} + //===----------------------------------------------------------------------===// // Integer Clamp Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll b/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll new file mode 100644 index 0000000000000..b5f43f9f68936 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX950 %s +define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { +; GFX950-LABEL: v_ashr_pk_i8_i32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX950-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX950-NEXT: v_mov_b32_e32 v1, 0xffffff80 +; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f +; GFX950-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-NEXT: s_ashr_i32 s1, s1, s2 +; GFX950-NEXT: s_ashr_i32 s0, s0, s2 +; GFX950-NEXT: v_med3_i32 v3, s0, v1, v2 +; GFX950-NEXT: v_med3_i32 v1, s1, v1, v2 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX950-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX950-NEXT: global_store_short v0, v1, s[6:7] +; GFX950-NEXT: s_endpgm + %insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0 + %build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1 + %src2.clamp = and i32 %src2, 31 + %insert.1 = insertelement <2 x i32> poison, i32 %src2.clamp, i64 0 + %src2.broadcast = shufflevector <2 x i32> %insert.1, <2 x i32> poison, <2 x i32> zeroinitializer + %ashr = ashr <2 x i32> %build_vector, %src2.broadcast + %sat.low = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %ashr, <2 x i32> ) + %sat.hi = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %sat.low, <2 x i32> ) + %trunc = trunc nsw <2 x i32> %sat.hi to <2 x i8> + %ret = bitcast <2 x i8> %trunc to i16 + store i16 %ret, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { +; GFX950-LABEL: v_ashr_pk_u8_i32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX950-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX950-NEXT: v_mov_b32_e32 v1, 0xff +; GFX950-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-NEXT: s_ashr_i32 s1, s1, s2 +; GFX950-NEXT: s_ashr_i32 s0, s0, s2 +; GFX950-NEXT: v_med3_i32 v2, s0, 0, v1 +; GFX950-NEXT: v_med3_i32 v1, s1, 0, v1 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX950-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX950-NEXT: global_store_short v0, v1, s[6:7] +; GFX950-NEXT: s_endpgm + %insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0 + %build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1 + %src2.clamp = and i32 %src2, 31 + %insert.1 = insertelement <2 x i32> poison, i32 %src2.clamp, i64 0 + %src2.broadcast = shufflevector <2 x i32> %insert.1, <2 x i32> poison, <2 x i32> zeroinitializer + %ashr = ashr <2 x i32> %build_vector, %src2.broadcast + %sat.low = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %ashr, <2 x i32> ) + %sat.hi = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %sat.low, <2 x i32> ) + %trunc = trunc nsw <2 x i32> %sat.hi to <2 x i8> + %ret = bitcast <2 x i8> %trunc to i16 + store i16 %ret, ptr addrspace(1) %out + ret void +} From 0a96161beb161933e64e188bfb0754df494c3a58 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Mon, 2 Dec 2024 16:23:26 -0800 Subject: [PATCH 004/191] [lldb] Simplify DumpValueObjectOptions::PointerDepth (NFC) (#117504) `Mode::Always` and `Mode::Default` are handled identically. `Mode::Never` is the same as having a count of 0. --- .../lldb/DataFormatters/DumpValueObjectOptions.h | 10 ++++------ lldb/source/DataFormatters/DumpValueObjectOptions.cpp | 10 ++++------ lldb/source/DataFormatters/ValueObjectPrinter.cpp | 9 +-------- .../Interpreter/OptionGroupValueObjectDisplay.cpp | 3 +-- 4 files changed, 10 insertions(+), 22 deletions(-) diff --git a/lldb/include/lldb/DataFormatters/DumpValueObjectOptions.h b/lldb/include/lldb/DataFormatters/DumpValueObjectOptions.h index c7f8cccc116c4..ce15963ab5662 100644 --- a/lldb/include/lldb/DataFormatters/DumpValueObjectOptions.h +++ b/lldb/include/lldb/DataFormatters/DumpValueObjectOptions.h @@ -22,13 +22,12 @@ namespace lldb_private { class DumpValueObjectOptions { public: struct PointerDepth { - enum class Mode { Always, Default, Never } m_mode; - uint32_t m_count; + uint32_t m_count = 0; PointerDepth Decremented() const { if (m_count > 0) - return PointerDepth{m_mode, m_count - 1}; - return PointerDepth{m_mode, m_count}; + return {m_count - 1}; + return *this; } bool CanAllowExpansion() const; @@ -65,8 +64,7 @@ class DumpValueObjectOptions { DumpValueObjectOptions(ValueObject &valobj); - DumpValueObjectOptions & - SetMaximumPointerDepth(PointerDepth depth = {PointerDepth::Mode::Never, 0}); + DumpValueObjectOptions &SetMaximumPointerDepth(uint32_t depth); DumpValueObjectOptions &SetMaximumDepth(uint32_t depth, bool is_default); diff --git a/lldb/source/DataFormatters/DumpValueObjectOptions.cpp b/lldb/source/DataFormatters/DumpValueObjectOptions.cpp index 18d590d47d9a0..b952fb643f13e 100644 --- a/lldb/source/DataFormatters/DumpValueObjectOptions.cpp +++ b/lldb/source/DataFormatters/DumpValueObjectOptions.cpp @@ -14,10 +14,8 @@ using namespace lldb; using namespace lldb_private; DumpValueObjectOptions::DumpValueObjectOptions() - : m_summary_sp(), m_root_valobj_name(), - m_max_ptr_depth(PointerDepth{PointerDepth::Mode::Default, 0}), - m_decl_printing_helper(), m_child_printing_decider(), - m_pointer_as_array(), m_use_synthetic(true), + : m_summary_sp(), m_root_valobj_name(), m_decl_printing_helper(), + m_child_printing_decider(), m_pointer_as_array(), m_use_synthetic(true), m_scope_already_checked(false), m_flat_output(false), m_ignore_cap(false), m_show_types(false), m_show_location(false), m_use_objc(false), m_hide_root_type(false), m_hide_root_name(false), m_hide_name(false), @@ -33,8 +31,8 @@ DumpValueObjectOptions::DumpValueObjectOptions(ValueObject &valobj) } DumpValueObjectOptions & -DumpValueObjectOptions::SetMaximumPointerDepth(PointerDepth depth) { - m_max_ptr_depth = depth; +DumpValueObjectOptions::SetMaximumPointerDepth(uint32_t depth) { + m_max_ptr_depth = {depth}; return *this; } diff --git a/lldb/source/DataFormatters/ValueObjectPrinter.cpp b/lldb/source/DataFormatters/ValueObjectPrinter.cpp index face38253efab..01e604e019f25 100644 --- a/lldb/source/DataFormatters/ValueObjectPrinter.cpp +++ b/lldb/source/DataFormatters/ValueObjectPrinter.cpp @@ -503,14 +503,7 @@ ValueObjectPrinter::PrintObjectDescriptionIfNeeded(bool value_printed, } bool DumpValueObjectOptions::PointerDepth::CanAllowExpansion() const { - switch (m_mode) { - case Mode::Always: - case Mode::Default: - return m_count > 0; - case Mode::Never: - return false; - } - return false; + return m_count > 0; } bool ValueObjectPrinter::ShouldPrintChildren( diff --git a/lldb/source/Interpreter/OptionGroupValueObjectDisplay.cpp b/lldb/source/Interpreter/OptionGroupValueObjectDisplay.cpp index 0e8c1f4b5f1d9..d633c469e603e 100644 --- a/lldb/source/Interpreter/OptionGroupValueObjectDisplay.cpp +++ b/lldb/source/Interpreter/OptionGroupValueObjectDisplay.cpp @@ -190,8 +190,7 @@ DumpValueObjectOptions OptionGroupValueObjectDisplay::GetAsDumpOptions( LanguageRuntimeDescriptionDisplayVerbosity lang_descr_verbosity, lldb::Format format, lldb::TypeSummaryImplSP summary_sp) { DumpValueObjectOptions options; - options.SetMaximumPointerDepth( - {DumpValueObjectOptions::PointerDepth::Mode::Always, ptr_depth}); + options.SetMaximumPointerDepth(ptr_depth); if (use_objc) options.SetShowSummary(false); else From 897dbdeab7e86248183f2e20cb1ae35e59551f62 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 2 Dec 2024 19:55:46 -0500 Subject: [PATCH 005/191] [gn] port 51003076ebc1 --- llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn index 945d31afca10f..c7c9459fdff16 100644 --- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn @@ -6,10 +6,12 @@ unittest("CoreTests") { deps = [ "//bolt/lib/Core", "//bolt/lib/Rewrite", + "//bolt/lib/Profile", "//llvm/lib/DebugInfo/DWARF", "//llvm/lib/MC", "//llvm/lib/Object", "//llvm/lib/Target:TargetsToBuild", + "//llvm/lib/Testing/Support", ] sources = [ "BinaryContext.cpp", From a2cb2088e766eaadfd6d573d379d5c692262e24f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 2 Dec 2024 16:57:22 -0800 Subject: [PATCH 006/191] [RISCV] Fix a typo in a deinterleave test This was supposed to test a deinterleave Factor=4, Offset=1, but had an off by one in the shuffle mask. --- .../rvv/fixed-vectors-shuffle-deinterleave.ll | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index 27e66690d1b1e..6450174d44ca8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -96,24 +96,25 @@ define void @deinterleave4_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -9 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: li a0, 5 -; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v9, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vwaddu.vv v11, v9, v10 +; CHECK-NEXT: vwmaccu.vx v11, a0, v10 ; CHECK-NEXT: li a0, 34 ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vcompress.vm v11, v8, v9 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v11, v8, v10, v0.t -; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v10, v11, v0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 - %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> store <8 x i8> %shuffle.i5, ptr %out, align 1 ret void } From f71ea4bc1b01fd7e29048db82b3e21fba74e8dab Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 3 Dec 2024 09:04:04 +0800 Subject: [PATCH 007/191] [SLP][REVEC] reorderNodeWithReuses should not be called if all users of a TreeEntry are ShuffleVectorInst. (#118260) --- .../Transforms/Vectorize/SLPVectorizer.cpp | 17 ++++++++++ llvm/test/Transforms/SLPVectorizer/revec.ll | 34 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0475510264336..33657c26356d6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6073,6 +6073,23 @@ void BoUpSLP::reorderTopToBottom() { TE->Scalars.size(); }) && "All users must be of VF size."); + if (SLPReVec) { + assert(SLPReVec && "Only supported by REVEC."); + // ShuffleVectorInst does not do reorderOperands (and it should not + // because ShuffleVectorInst supports only a limited set of + // patterns). Only do reorderNodeWithReuses if all of the users are + // not ShuffleVectorInst. + if (all_of(TE->UserTreeIndices, [&](const EdgeInfo &EI) { + return isa(EI.UserTE->getMainOp()); + })) + continue; + assert(none_of(TE->UserTreeIndices, + [&](const EdgeInfo &EI) { + return isa( + EI.UserTE->getMainOp()); + }) && + "Does not know how to reorder."); + } // Update ordering of the operands with the smaller VF than the given // one. reorderNodeWithReuses(*TE, Mask); diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index b160c0174c0a7..ce13f478d3811 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -447,3 +447,37 @@ for.end.loopexit: store <4 x i32> %4, ptr %out2, align 4 ret void } + +define void @test14(<8 x i1> %0) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v8i1(<16 x i1> poison, <8 x i1> [[TMP0:%.*]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i1> [[TMP1]], <16 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[TMP7:%.*]] = phi <16 x i16> [ [[TMP6]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP7]], i64 12) +; CHECK-NEXT: [[OR0:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: ret void +; +entry: + %sext0 = sext <8 x i1> %0 to <8 x i16> + %sext1 = sext <8 x i1> %0 to <8 x i16> + %1 = shufflevector <8 x i16> %sext0, <8 x i16> zeroinitializer, <4 x i32> + %2 = shufflevector <8 x i16> %sext0, <8 x i16> zeroinitializer, <4 x i32> + %3 = shufflevector <8 x i16> %sext1, <8 x i16> zeroinitializer, <4 x i32> + %4 = shufflevector <8 x i16> %sext1, <8 x i16> zeroinitializer, <4 x i32> + br label %for.end.loopexit + +for.end.loopexit: + %phi0 = phi <4 x i16> [ %1, %entry ] + %phi1 = phi <4 x i16> [ %2, %entry ] + %phi2 = phi <4 x i16> [ %3, %entry ] + %phi3 = phi <4 x i16> [ %4, %entry ] + %or0 = or <4 x i16> %phi1, zeroinitializer + ret void +} From c3536b263f253a69fb336fb0617ee33a01a5c5dd Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 2 Dec 2024 17:08:07 -0800 Subject: [PATCH 008/191] [WebAssembly] Define call-indirect-overlong and bulk-memory-opt features (#117087) This defines some new target features. These are subsets of existing features that reflect implementation concerns: - "call-indirect-overlong" - implied by "reference-types"; just the overlong encoding for the `call_indirect` immediate, and not the actual reference types. - "bulk-memory-opt" - implied by "bulk-memory": just `memory.copy` and `memory.fill`, and not the other instructions in the bulk-memory proposal. This is split out from https://github.com/llvm/llvm-project/pull/112035. --------- Co-authored-by: Heejin Ahn --- clang/include/clang/Driver/Options.td | 4 +++ clang/lib/Basic/Targets/WebAssembly.cpp | 34 +++++++++++++++++++ clang/lib/Basic/Targets/WebAssembly.h | 2 ++ lld/test/wasm/compress-relocs.ll | 2 +- lld/test/wasm/import-table-explicit.s | 2 +- lld/test/wasm/invalid-mvp-table-use.s | 2 +- lld/test/wasm/lto/Inputs/libcall-archive.ll | 2 +- lld/test/wasm/lto/libcall-archive.ll | 2 +- lld/test/wasm/lto/stub-library-libcall.s | 4 +-- lld/test/wasm/multi-table.s | 2 +- lld/wasm/InputFiles.cpp | 21 ++++++------ lld/wasm/SyntheticSections.cpp | 5 +-- .../AsmParser/WebAssemblyAsmParser.cpp | 28 +++++++++++---- llvm/lib/Target/WebAssembly/WebAssembly.td | 17 +++++++--- .../WebAssembly/WebAssemblyFastISel.cpp | 2 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 2 +- .../WebAssembly/WebAssemblyInstrBulkMemory.td | 8 ++--- .../WebAssembly/WebAssemblyInstrInfo.td | 8 +++++ .../WebAssemblySelectionDAGInfo.cpp | 4 +-- .../WebAssembly/WebAssemblySubtarget.cpp | 18 ++++++++++ .../Target/WebAssembly/WebAssemblySubtarget.h | 4 +++ .../WebAssembly/WebAssemblyUtilities.cpp | 4 +-- .../test/CodeGen/WebAssembly/call-indirect.ll | 4 +-- .../WebAssembly/cfg-stackify-eh-legacy.ll | 6 ++-- .../CodeGen/WebAssembly/cfg-stackify-eh.ll | 6 ++-- .../CodeGen/WebAssembly/disable-feature.ll | 4 +-- .../CodeGen/WebAssembly/function-pointer64.ll | 4 +-- .../CodeGen/WebAssembly/reference-types.ll | 6 +++- .../WebAssembly/target-features-attrs.ll | 10 ++++-- .../WebAssembly/target-features-cpus.ll | 27 +++++++++++---- .../WebAssembly/target-features-tls.ll | 5 ++- .../WebAssembly/extern-functype-intrinsic.ll | 4 +-- llvm/test/MC/WebAssembly/function-alias.ll | 4 +-- llvm/test/MC/WebAssembly/libcall.ll | 2 +- 34 files changed, 190 insertions(+), 69 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9c356c9d2ea4e..cb96b5daed9d3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5092,6 +5092,10 @@ def matomics : Flag<["-"], "matomics">, Group; def mno_atomics : Flag<["-"], "mno-atomics">, Group; def mbulk_memory : Flag<["-"], "mbulk-memory">, Group; def mno_bulk_memory : Flag<["-"], "mno-bulk-memory">, Group; +def mbulk_memory_opt : Flag<["-"], "mbulk-memory-opt">, Group; +def mno_bulk_memory_opt : Flag<["-"], "mno-bulk-memory-opt">, Group; +def mcall_indirect_overlong : Flag<["-"], "mcall-indirect-overlong">, Group; +def mno_call_indirect_overlong : Flag<["-"], "mno-call-indirect-overlong">, Group; def mexception_handing : Flag<["-"], "mexception-handling">, Group; def mno_exception_handing : Flag<["-"], "mno-exception-handling">, Group; def mextended_const : Flag<["-"], "mextended-const">, Group; diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 0b380bdf835ff..d9d01bceb433a 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -47,6 +47,8 @@ bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) .Case("atomics", HasAtomics) .Case("bulk-memory", HasBulkMemory) + .Case("bulk-memory-opt", HasBulkMemoryOpt) + .Case("call-indirect-overlong", HasCallIndirectOverlong) .Case("exception-handling", HasExceptionHandling) .Case("extended-const", HasExtendedConst) .Case("fp16", HasFP16) @@ -79,6 +81,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__wasm_atomics__"); if (HasBulkMemory) Builder.defineMacro("__wasm_bulk_memory__"); + if (HasBulkMemoryOpt) + Builder.defineMacro("__wasm_bulk_memory_opt__"); if (HasExceptionHandling) Builder.defineMacro("__wasm_exception_handling__"); if (HasExtendedConst) @@ -155,6 +159,8 @@ bool WebAssemblyTargetInfo::initFeatureMap( const std::vector &FeaturesVec) const { auto addGenericFeatures = [&]() { Features["bulk-memory"] = true; + Features["bulk-memory-opt"] = true; + Features["call-indirect-overlong"] = true; Features["multivalue"] = true; Features["mutable-globals"] = true; Features["nontrapping-fptoint"] = true; @@ -200,6 +206,22 @@ bool WebAssemblyTargetInfo::handleTargetFeatures( HasBulkMemory = false; continue; } + if (Feature == "+bulk-memory-opt") { + HasBulkMemoryOpt = true; + continue; + } + if (Feature == "-bulk-memory-opt") { + HasBulkMemoryOpt = false; + continue; + } + if (Feature == "+call-indirect-overlong") { + HasCallIndirectOverlong = true; + continue; + } + if (Feature == "-call-indirect-overlong") { + HasCallIndirectOverlong = false; + continue; + } if (Feature == "+exception-handling") { HasExceptionHandling = true; continue; @@ -310,6 +332,18 @@ bool WebAssemblyTargetInfo::handleTargetFeatures( << Feature << "-target-feature"; return false; } + + // bulk-memory-opt is a subset of bulk-memory. + if (HasBulkMemory) { + HasBulkMemoryOpt = true; + } + + // The reference-types feature included the change to `call_indirect` + // encodings to support overlong immediates. + if (HasReferenceTypes) { + HasCallIndirectOverlong = true; + } + return true; } diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index d8ed88b4c840f..c92ed161a92a7 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -55,6 +55,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool HasAtomics = false; bool HasBulkMemory = false; + bool HasBulkMemoryOpt = false; + bool HasCallIndirectOverlong = false; bool HasExceptionHandling = false; bool HasExtendedConst = false; bool HasFP16 = false; diff --git a/lld/test/wasm/compress-relocs.ll b/lld/test/wasm/compress-relocs.ll index f1faab754cb76..cea9f3476e996 100644 --- a/lld/test/wasm/compress-relocs.ll +++ b/lld/test/wasm/compress-relocs.ll @@ -1,5 +1,5 @@ ; RUN: llc -filetype=obj %s -o %t.o -; RUN: llvm-mc -mattr=+reference-types -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/call-indirect.s -o %t2.o +; RUN: llvm-mc -mattr=+call-indirect-overlong -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/call-indirect.s -o %t2.o ; RUN: wasm-ld --export-dynamic -o %t.wasm %t2.o %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s ; RUN: wasm-ld --export-dynamic -O2 -o %t-opt.wasm %t2.o %t.o diff --git a/lld/test/wasm/import-table-explicit.s b/lld/test/wasm/import-table-explicit.s index 1dc21beba0629..701b7a1dc3e16 100644 --- a/lld/test/wasm/import-table-explicit.s +++ b/lld/test/wasm/import-table-explicit.s @@ -1,4 +1,4 @@ -# RUN: llvm-mc -mattr=+reference-types -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: llvm-mc -mattr=+call-indirect-overlong -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o # RUN: wasm-ld --import-table -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/invalid-mvp-table-use.s b/lld/test/wasm/invalid-mvp-table-use.s index b4f12a7eeb9a4..58c472e29d1ad 100644 --- a/lld/test/wasm/invalid-mvp-table-use.s +++ b/lld/test/wasm/invalid-mvp-table-use.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s # # If any table is defined or declared besides the __indirect_function_table, -# the compilation unit should be compiled with -mattr=+reference-types, +# the compilation unit should be compiled with -mattr=+call-indirect-overlong, # causing symbol table entries to be emitted for all tables. # RUN: not wasm-ld --no-entry %t.o -o %t.wasm 2>&1 | FileCheck -check-prefix=CHECK-ERR %s diff --git a/lld/test/wasm/lto/Inputs/libcall-archive.ll b/lld/test/wasm/lto/Inputs/libcall-archive.ll index 7d8c34196dfe4..30764af83e673 100644 --- a/lld/test/wasm/lto/Inputs/libcall-archive.ll +++ b/lld/test/wasm/lto/Inputs/libcall-archive.ll @@ -5,4 +5,4 @@ define void @memcpy() #0 { ret void } -attributes #0 = { "target-features"="-bulk-memory" } +attributes #0 = { "target-features"="-bulk-memory,-bulk-memory-opt" } diff --git a/lld/test/wasm/lto/libcall-archive.ll b/lld/test/wasm/lto/libcall-archive.ll index 5c46d2f7ed783..0cee9a5de29f6 100644 --- a/lld/test/wasm/lto/libcall-archive.ll +++ b/lld/test/wasm/lto/libcall-archive.ll @@ -16,7 +16,7 @@ entry: declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) -attributes #0 = { "target-features"="-bulk-memory" } +attributes #0 = { "target-features"="-bulk-memory,-bulk-memory-opt" } ; CHECK: - Type: CUSTOM ; CHECK-NEXT: Name: name diff --git a/lld/test/wasm/lto/stub-library-libcall.s b/lld/test/wasm/lto/stub-library-libcall.s index d65983c0cf5bf..40e15933f7bc3 100644 --- a/lld/test/wasm/lto/stub-library-libcall.s +++ b/lld/test/wasm/lto/stub-library-libcall.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t_main.o %t/main.s # RUN: llvm-as %S/Inputs/foo.ll -o %t_foo.o # RUN: llvm-as %S/Inputs/libcall.ll -o %t_libcall.o -# RUN: wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm +# RUN: wasm-ld -mllvm -mattr=-bulk-memory,-bulk-memory-opt %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm # RUN: obj2yaml %t.wasm | FileCheck %s # The function `func_with_libcall` will generate an undefined reference to @@ -12,7 +12,7 @@ # If %t_foo.o is not included in the link we get an undefined symbol reported # to the dependency of memcpy on the foo export: -# RUN: not wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s +# RUN: not wasm-ld -mllvm -mattr=-bulk-memory,-bulk-memory-opt %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s # MISSING: stub.so: undefined symbol: foo. Required by memcpy #--- main.s diff --git a/lld/test/wasm/multi-table.s b/lld/test/wasm/multi-table.s index bf905ac748f9f..afe8ddac49768 100644 --- a/lld/test/wasm/multi-table.s +++ b/lld/test/wasm/multi-table.s @@ -26,7 +26,7 @@ call_indirect_explicit_tables: call_indirect table_b, () -> () end_function -# RT-MVP: wasm-ld: error: object file not built with 'reference-types' feature conflicts with import of table table_a by file +# RT-MVP: wasm-ld: error: object file not built with 'reference-types' or 'call-indirect-overlong' feature conflicts with import of table table_a by file # CHECK: --- !WASM # CHECK-NEXT: FileHeader: diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index fd06788457966..221f02aa1c157 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -255,13 +255,14 @@ static void setRelocs(const std::vector &chunks, } } -// An object file can have two approaches to tables. With the reference-types -// feature enabled, input files that define or use tables declare the tables -// using symbols, and record each use with a relocation. This way when the -// linker combines inputs, it can collate the tables used by the inputs, -// assigning them distinct table numbers, and renumber all the uses as -// appropriate. At the same time, the linker has special logic to build the -// indirect function table if it is needed. +// An object file can have two approaches to tables. With the +// reference-types feature or call-indirect-overlong feature enabled +// (explicitly, or implied by the reference-types feature), input files that +// define or use tables declare the tables using symbols, and record each use +// with a relocation. This way when the linker combines inputs, it can collate +// the tables used by the inputs, assigning them distinct table numbers, and +// renumber all the uses as appropriate. At the same time, the linker has +// special logic to build the indirect function table if it is needed. // // However, MVP object files (those that target WebAssembly 1.0, the "minimum // viable product" version of WebAssembly) neither write table symbols nor @@ -284,9 +285,9 @@ void ObjFile::addLegacyIndirectFunctionTableIfNeeded( return; // It's possible for an input to define tables and also use the indirect - // function table, but forget to compile with -mattr=+reference-types. - // For these newer files, we require symbols for all tables, and - // relocations for all of their uses. + // function table, but forget to compile with -mattr=+call-indirect-overlong + // or -mattr=+reference-types. For these newer files, we require symbols for + // all tables, and relocations for all of their uses. if (tableSymbolCount != 0) { error(toString(this) + ": expected one symbol table entry for each of the " + diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 1454c3324af98..6b32d12ebeb45 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -326,8 +326,9 @@ void TableSection::addTable(InputTable *table) { // to assign table number 0 to the indirect function table. for (const auto *culprit : out.importSec->importedSymbols) { if (isa(culprit)) { - error("object file not built with 'reference-types' feature " - "conflicts with import of table " + + error("object file not built with 'reference-types' or " + "'call-indirect-overlong' feature conflicts with import of " + "table " + culprit->getName() + " by file " + toString(culprit->getFile())); return; diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 10451600050ca..f693ef3dbf962 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -276,7 +276,18 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { : MCTargetAsmParser(Options, STI, MII), Parser(Parser), Lexer(Parser.getLexer()), Is64(STI.getTargetTriple().isArch64Bit()), TC(Parser, MII, Is64), SkipTypeCheck(Options.MCNoTypeCheck) { - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + FeatureBitset FBS = ComputeAvailableFeatures(STI.getFeatureBits()); + + // bulk-memory implies bulk-memory-opt + if (FBS.test(WebAssembly::FeatureBulkMemory)) { + FBS.set(WebAssembly::FeatureBulkMemoryOpt); + } + // reference-types implies call-indirect-overlong + if (FBS.test(WebAssembly::FeatureReferenceTypes)) { + FBS.set(WebAssembly::FeatureCallIndirectOverlong); + } + + setAvailableFeatures(FBS); // Don't type check if this is inline asm, since that is a naked sequence of // instructions without a function/locals decl. auto &SM = Parser.getSourceManager(); @@ -291,7 +302,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { DefaultFunctionTable = getOrCreateFunctionTableSymbol( getContext(), "__indirect_function_table", Is64); - if (!STI->checkFeatures("+reference-types")) + if (!STI->checkFeatures("+call-indirect-overlong") && + !STI->checkFeatures("+reference-types")) DefaultFunctionTable->setOmitFromLinkingSection(); } @@ -531,11 +543,13 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { } bool parseFunctionTableOperand(std::unique_ptr *Op) { - if (STI->checkFeatures("+reference-types")) { - // If the reference-types feature is enabled, there is an explicit table - // operand. To allow the same assembly to be compiled with or without - // reference types, we allow the operand to be omitted, in which case we - // default to __indirect_function_table. + if (STI->checkFeatures("+call-indirect-overlong") || + STI->checkFeatures("+reference-types")) { + // If the call-indirect-overlong feature is enabled, or implied by the + // reference-types feature, there is an explicit table operand. To allow + // the same assembly to be compiled with or without + // call-indirect-overlong, we allow the operand to be omitted, in which + // case we default to __indirect_function_table. auto &Tok = Lexer.getTok(); if (Tok.is(AsmToken::Identifier)) { auto *Sym = diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 88628f2a79354..3b9254b3a7cee 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -29,6 +29,14 @@ def FeatureBulkMemory : SubtargetFeature<"bulk-memory", "HasBulkMemory", "true", "Enable bulk memory operations">; +def FeatureBulkMemoryOpt : + SubtargetFeature<"bulk-memory-opt", "HasBulkMemoryOpt", "true", + "Enable bulk memory optimization operations">; + +def FeatureCallIndirectOverlong : + SubtargetFeature<"call-indirect-overlong", "HasCallIndirectOverlong", "true", + "Enable overlong encoding for call_indirect immediates">; + def FeatureExceptionHandling : SubtargetFeature<"exception-handling", "HasExceptionHandling", "true", "Enable Wasm exception handling">; @@ -114,15 +122,16 @@ def : ProcessorModel<"mvp", NoSchedModel, []>; // consideration given to available support in relevant engines and tools, and // the importance of the features. def : ProcessorModel<"generic", NoSchedModel, - [FeatureBulkMemory, FeatureMultivalue, + [FeatureBulkMemory, FeatureBulkMemoryOpt, + FeatureCallIndirectOverlong, FeatureMultivalue, FeatureMutableGlobals, FeatureNontrappingFPToInt, FeatureReferenceTypes, FeatureSignExt]>; // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, - [FeatureAtomics, FeatureBulkMemory, - FeatureExceptionHandling, FeatureExtendedConst, - FeatureFP16, FeatureMultiMemory, + [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt, + FeatureCallIndirectOverlong, FeatureExceptionHandling, + FeatureExtendedConst, FeatureFP16, FeatureMultiMemory, FeatureMultivalue, FeatureMutableGlobals, FeatureNontrappingFPToInt, FeatureRelaxedSIMD, FeatureReferenceTypes, FeatureSIMD128, FeatureSignExt, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 558aaa38096f7..210a35e1462ac 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -895,7 +895,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { // The table into which this call_indirect indexes. MCSymbolWasm *Table = WebAssembly::getOrCreateFunctionTableSymbol( MF->getContext(), Subtarget); - if (Subtarget->hasReferenceTypes()) { + if (Subtarget->hasCallIndirectOverlong()) { MIB.addSym(Table); } else { // Otherwise for the MVP there is at most one table whose number is 0, but diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 94b49387b58f9..c765d2b1ab95b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -768,7 +768,7 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, MF.getContext(), Subtarget) : WebAssembly::getOrCreateFunctionTableSymbol( MF.getContext(), Subtarget); - if (Subtarget->hasReferenceTypes()) { + if (Subtarget->hasCallIndirectOverlong()) { MIB.addSym(Table); } else { // For the MVP there is at most one table whose number is 0, but we can't diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td index 0772afb039f82..79d6f21517e5d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -11,13 +11,13 @@ /// //===----------------------------------------------------------------------===// -// Instruction requiring HasBulkMemory and the bulk memory prefix byte +// Instruction requiring HasBulkMemoryOpt and the bulk memory prefix byte multiclass BULK_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : I, - Requires<[HasBulkMemory]>; + Requires<[HasBulkMemoryOpt]>; } // Bespoke types and nodes for bulk memory ops @@ -89,14 +89,14 @@ defm CPY_A#B : I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx, rc:$dst, rc:$src, rc:$len )], "", "", 0>, - Requires<[HasBulkMemory]>; + Requires<[HasBulkMemoryOpt]>; let usesCustomInserter = 1, isCodeGenOnly = 1, mayStore = 1 in defm SET_A#B : I<(outs), (ins i32imm_op:$idx, rc:$dst, I32:$value, rc:$size), (outs), (ins i32imm_op:$idx), [(wasm_memset (i32 imm:$idx), rc:$dst, I32:$value, rc:$size)], "", "", 0>, - Requires<[HasBulkMemory]>; + Requires<[HasBulkMemoryOpt]>; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index b3ea499c4f915..415e802951a94 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -30,6 +30,14 @@ def HasBulkMemory : Predicate<"Subtarget->hasBulkMemory()">, AssemblerPredicate<(all_of FeatureBulkMemory), "bulk-memory">; +def HasBulkMemoryOpt : + Predicate<"Subtarget->hasBulkMemoryOpt()">, + AssemblerPredicate<(all_of FeatureBulkMemoryOpt), "bulk-memory-opt">; + +def HasCallIndirectOverlong : + Predicate<"Subtarget->hasCallIndirectOverlong()">, + AssemblerPredicate<(all_of FeatureCallIndirectOverlong), "call-indirect-overlong">; + def HasExceptionHandling : Predicate<"Subtarget->hasExceptionHandling()">, AssemblerPredicate<(all_of FeatureExceptionHandling), "exception-handling">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp index d51bfeb6d8592..6f37dab409534 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -23,7 +23,7 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { auto &ST = DAG.getMachineFunction().getSubtarget(); - if (!ST.hasBulkMemory()) + if (!ST.hasBulkMemoryOpt()) return SDValue(); SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); @@ -51,7 +51,7 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset( SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { auto &ST = DAG.getMachineFunction().getSubtarget(); - if (!ST.hasBulkMemory()) + if (!ST.hasBulkMemoryOpt()) return SDValue(); SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 912f61765579f..40ea48ab3ac48 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -34,6 +34,24 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, CPU = "generic"; ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); + + FeatureBitset Bits = getFeatureBits(); + + // bulk-memory implies bulk-memory-opt + if (HasBulkMemory) { + HasBulkMemoryOpt = true; + Bits.set(WebAssembly::FeatureBulkMemoryOpt); + } + + // reference-types implies call-indirect-overlong + if (HasReferenceTypes) { + HasCallIndirectOverlong = true; + Bits.set(WebAssembly::FeatureCallIndirectOverlong); + } + + // In case we changed any bits, update `MCSubtargetInfo`'s `FeatureBitset`. + setFeatureBits(Bits); + return *this; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index f2bf2902f775b..591ce25611e3e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -41,6 +41,8 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasAtomics = false; bool HasBulkMemory = false; + bool HasBulkMemoryOpt = false; + bool HasCallIndirectOverlong = false; bool HasExceptionHandling = false; bool HasExtendedConst = false; bool HasFP16 = false; @@ -95,6 +97,8 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool hasAddr64() const { return TargetTriple.isArch64Bit(); } bool hasAtomics() const { return HasAtomics; } bool hasBulkMemory() const { return HasBulkMemory; } + bool hasBulkMemoryOpt() const { return HasBulkMemoryOpt; } + bool hasCallIndirectOverlong() const { return HasCallIndirectOverlong; } bool hasExceptionHandling() const { return HasExceptionHandling; } bool hasExtendedConst() const { return HasExtendedConst; } bool hasFP16() const { return HasFP16; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index f950a915db96f..6cfc93ef1faae 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -116,7 +116,7 @@ MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol( Sym->setUndefined(); } // MVP object files can't have symtab entries for tables. - if (!(Subtarget && Subtarget->hasReferenceTypes())) + if (!(Subtarget && Subtarget->hasCallIndirectOverlong())) Sym->setOmitFromLinkingSection(); return Sym; } @@ -141,7 +141,7 @@ MCSymbolWasm *WebAssembly::getOrCreateFuncrefCallTableSymbol( Sym->setTableType(TableType); } // MVP object files can't have symtab entries for tables. - if (!(Subtarget && Subtarget->hasReferenceTypes())) + if (!(Subtarget && Subtarget->hasCallIndirectOverlong())) Sym->setOmitFromLinkingSection(); return Sym; } diff --git a/llvm/test/CodeGen/WebAssembly/call-indirect.ll b/llvm/test/CodeGen/WebAssembly/call-indirect.ll index 55a654f358490..e0a0d14deacba 100644 --- a/llvm/test/CodeGen/WebAssembly/call-indirect.ll +++ b/llvm/test/CodeGen/WebAssembly/call-indirect.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -mattr=-reference-types -O2 | FileCheck --check-prefixes=CHECK,NOREF %s -; RUN: llc < %s -asm-verbose=false -mattr=+reference-types -O2 | FileCheck --check-prefixes=CHECK,REF %s +; RUN: llc < %s -asm-verbose=false -mattr=-reference-types,-call-indirect-overlong -O2 | FileCheck --check-prefixes=CHECK,NOREF %s +; RUN: llc < %s -asm-verbose=false -mattr=+call-indirect-overlong -O2 | FileCheck --check-prefixes=CHECK,REF %s ; RUN: llc < %s -asm-verbose=false -O2 --filetype=obj | obj2yaml | FileCheck --check-prefix=OBJ %s ; Test that compilation units with call_indirect but without any diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll index 3b312dabcd84d..ab9023cbac604 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll @@ -1,9 +1,9 @@ ; REQUIRES: asserts ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory | FileCheck %s ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory -; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory | FileCheck %s --check-prefix=NOOPT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS +; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory,-bulk-memory-opt | FileCheck %s --check-prefix=NOOPT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory,-bulk-memory-opt -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory,-bulk-memory-opt -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 6df626df08883..22fda36c25bfd 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -1,9 +1,9 @@ ; REQUIRES: asserts ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,bulk-memory | FileCheck %s ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,bulk-memory -; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -verify-machineinstrs -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,-bulk-memory | FileCheck %s --check-prefix=NOOPT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS +; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -verify-machineinstrs -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,-bulk-memory,-bulk-memory-opt | FileCheck %s --check-prefix=NOOPT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,-bulk-memory,-bulk-memory-opt -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -wasm-enable-exnref -exception-model=wasm -mattr=+exception-handling,-bulk-memory,-bulk-memory-opt -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/disable-feature.ll b/llvm/test/CodeGen/WebAssembly/disable-feature.ll index 0684432a114df..5f7275f3699ed 100644 --- a/llvm/test/CodeGen/WebAssembly/disable-feature.ll +++ b/llvm/test/CodeGen/WebAssembly/disable-feature.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mattr=-sign-ext,-bulk-memory | FileCheck %s +; RUN: llc < %s -mattr=-sign-ext,-bulk-memory,-bulk-memory-opt | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -21,7 +21,7 @@ define i8 @not_use_extend8_s(i8 %v, i8 %x) { ret i8 %a } -attributes #0 = { "target-features"="+bulk-memory," } +attributes #0 = { "target-features"="+bulk-memory-opt" } declare void @llvm.memset.p0.i32(ptr, i8, i32, i1) diff --git a/llvm/test/CodeGen/WebAssembly/function-pointer64.ll b/llvm/test/CodeGen/WebAssembly/function-pointer64.ll index d5d10b00824fe..2accd4151767f 100644 --- a/llvm/test/CodeGen/WebAssembly/function-pointer64.ll +++ b/llvm/test/CodeGen/WebAssembly/function-pointer64.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -asm-verbose=false -mattr=-reference-types -O2 | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mattr=-reference-types,-call-indirect-overlong -O2 | FileCheck %s ; RUN: llc < %s -asm-verbose=false -mattr=+reference-types -O2 | FileCheck --check-prefix=REF %s -; RUN: llc < %s -asm-verbose=false -mattr=-reference-types -O2 --filetype=obj | obj2yaml | FileCheck --check-prefix=YAML %s +; RUN: llc < %s -asm-verbose=false -mattr=-reference-types,-call-indirect-overlong -O2 --filetype=obj | obj2yaml | FileCheck --check-prefix=YAML %s ; This tests pointer features that may codegen differently in wasm64. diff --git a/llvm/test/CodeGen/WebAssembly/reference-types.ll b/llvm/test/CodeGen/WebAssembly/reference-types.ll index 168aaec8f0943..3df383b023726 100644 --- a/llvm/test/CodeGen/WebAssembly/reference-types.ll +++ b/llvm/test/CodeGen/WebAssembly/reference-types.ll @@ -7,7 +7,11 @@ define void @reference-types() { ret void } -; CHECK: .int8 1 +; CHECK: .section .custom_section.target_features,"",@ +; CHECK-NEXT: .int8 2 +; CHECK-NEXT: .int8 43 +; CHECK-NEXT: .int8 22 +; CHECK-NEXT: .ascii "call-indirect-overlong" ; CHECK-NEXT: .int8 43 ; CHECK-NEXT: .int8 15 ; CHECK-NEXT: .ascii "reference-types" diff --git a/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll b/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll index 25dee51ac8c38..0e46b96591816 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll @@ -55,11 +55,14 @@ attributes #2 = { "target-features"="+reference-types" } ; Features in function attributes: ; +atomics, +nontrapping-fptoint, +reference-types ; CHECK-LABEL: .custom_section.target_features,"",@ -; CHECK-NEXT: .int8 3 +; CHECK-NEXT: .int8 4 ; CHECK-NEXT: .int8 43 ; CHECK-NEXT: .int8 7 ; CHECK-NEXT: .ascii "atomics" ; CHECK-NEXT: .int8 43 +; CHECK-NEXT: .int8 22 +; CHECK-NEXT: .ascii "call-indirect-overlong" +; CHECK-NEXT: .int8 43 ; CHECK-NEXT: .int8 19 ; CHECK-NEXT: .ascii "nontrapping-fptoint" ; CHECK-NEXT: .int8 43 @@ -69,11 +72,14 @@ attributes #2 = { "target-features"="+reference-types" } ; Features in function attributes + features specified by -mattr= option: ; +atomics, +nontrapping-fptoint, +reference-types, +simd128 ; SIMD128-LABEL: .custom_section.target_features,"",@ -; SIMD128-NEXT: .int8 4 +; SIMD128-NEXT: .int8 5 ; SIMD128-NEXT: .int8 43 ; SIMD128-NEXT: .int8 7 ; SIMD128-NEXT: .ascii "atomics" ; SIMD128-NEXT: .int8 43 +; SIMD128-NEXT: .int8 22 +; SIMD128-NEXT: .ascii "call-indirect-overlong" +; SIMD128-NEXT: .int8 43 ; SIMD128-NEXT: .int8 19 ; SIMD128-NEXT: .ascii "nontrapping-fptoint" ; SIMD128-NEXT: .int8 43 diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index ba10dd94a9838..661f5d8463928 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -11,13 +11,19 @@ target triple = "wasm32-unknown-unknown" ; mvp: should not contain the target features section ; MVP-NOT: .custom_section.target_features,"",@ -; generic: +multivalue, +mutable-globals, +reference-types, +sign-ext +; generic: +call-indirect-overlong, +multivalue, +mutable-globals, +reference-types, +sign-ext ; GENERIC-LABEL: .custom_section.target_features,"",@ -; GENERIC-NEXT: .int8 6 +; GENERIC-NEXT: .int8 8 ; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 11 ; GENERIC-NEXT: .ascii "bulk-memory" ; GENERIC-NEXT: .int8 43 +; GENERIC-NEXT: .int8 15 +; GENERIC-NEXT: .ascii "bulk-memory-opt" +; GENERIC-NEXT: .int8 43 +; GENERIC-NEXT: .int8 22 +; GENERIC-NEXT: .ascii "call-indirect-overlong" +; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 10 ; GENERIC-NEXT: .ascii "multivalue" ; GENERIC-NEXT: .int8 43 @@ -33,12 +39,13 @@ target triple = "wasm32-unknown-unknown" ; GENERIC-NEXT: .int8 8 ; GENERIC-NEXT: .ascii "sign-ext" -; bleeding-edge: +atomics, +bulk-memory, +exception-handling, +extended-const, -; +fp16, +multimemory, +multivalue, +mutable-globals, -; +nontrapping-fptoint, +relaxed-simd, +reference-types, -; +simd128, +sign-ext, +tail-call +; bleeding-edge: +atomics, +bulk-memory, +bulk-memory-opt, +; +call-indirect-overlong, +exception-handling, +; +extended-const, +fp16, +multimemory, +multivalue, +; +mutable-globals, +nontrapping-fptoint, +relaxed-simd, +; +reference-types, +simd128, +sign-ext, +tail-call ; BLEEDING-EDGE-LABEL: .section .custom_section.target_features,"",@ -; BLEEDING-EDGE-NEXT: .int8 14 +; BLEEDING-EDGE-NEXT: .int8 16 ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 7 ; BLEEDING-EDGE-NEXT: .ascii "atomics" @@ -46,6 +53,12 @@ target triple = "wasm32-unknown-unknown" ; BLEEDING-EDGE-NEXT: .int8 11 ; BLEEDING-EDGE-NEXT: .ascii "bulk-memory" ; BLEEDING-EDGE-NEXT: .int8 43 +; BLEEDING-EDGE-NEXT: .int8 15 +; BLEEDING-EDGE-NEXT: .ascii "bulk-memory-opt" +; BLEEDING-EDGE-NEXT: .int8 43 +; BLEEDING-EDGE-NEXT: .int8 22 +; BLEEDING-EDGE-NEXT: .ascii "call-indirect-overlong" +; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 18 ; BLEEDING-EDGE-NEXT: .ascii "exception-handling" ; BLEEDING-EDGE-NEXT: .int8 43 diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index 45bc06b5d5c96..4abe01a73aeee 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -21,11 +21,14 @@ target triple = "wasm32-unknown-unknown" ; +bulk-memory ; BULK-MEM-LABEL: .custom_section.target_features,"",@ -; BULK-MEM-NEXT: .int8 2 +; BULK-MEM-NEXT: .int8 3 ; BULK-MEM-NEXT: .int8 43 ; BULK-MEM-NEXT: .int8 7 ; BULK-MEM-NEXT: .ascii "atomics" ; BULK-MEM-NEXT: .int8 43 ; BULK-MEM-NEXT: .int8 11 ; BULK-MEM-NEXT: .ascii "bulk-memory" +; BULK-MEM-NEXT: .int8 43 +; BULK-MEM-NEXT: .int8 15 +; BULK-MEM-NEXT: .ascii "bulk-memory-opt" ; BULK-MEM-NEXT: .tbss.foo,"T",@ diff --git a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll index b321c0c82ad4d..6e9c64604e99d 100644 --- a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll +++ b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -mattr=-bulk-memory -o - | FileCheck %s -; RUN: llc %s -mattr=-bulk-memory -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s +; RUN: llc %s -mattr=-bulk-memory,-bulk-memory-opt -o - | FileCheck %s +; RUN: llc %s -mattr=-bulk-memory,-bulk-memory-opt -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s ; ModuleID = 'test.c' source_filename = "test.c" diff --git a/llvm/test/MC/WebAssembly/function-alias.ll b/llvm/test/MC/WebAssembly/function-alias.ll index 036cd7d06e063..3f76516a9bcc8 100644 --- a/llvm/test/MC/WebAssembly/function-alias.ll +++ b/llvm/test/MC/WebAssembly/function-alias.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj %s -mattr=-reference-types -o - | llvm-readobj --symbols - | FileCheck %s -; RUN: llc -filetype=obj %s -mattr=+reference-types -o - | llvm-readobj --symbols - | FileCheck --check-prefix=REF %s +; RUN: llc -filetype=obj %s -mattr=-reference-types,-call-indirect-overlong -o - | llvm-readobj --symbols - | FileCheck %s +; RUN: llc -filetype=obj %s -mattr=+reference-types,-call-indirect-overlong -o - | llvm-readobj --symbols - | FileCheck --check-prefix=REF %s target triple = "wasm32-unknown-unknown-wasm" diff --git a/llvm/test/MC/WebAssembly/libcall.ll b/llvm/test/MC/WebAssembly/libcall.ll index ffd32abe2345b..6f36ab7ad317f 100644 --- a/llvm/test/MC/WebAssembly/libcall.ll +++ b/llvm/test/MC/WebAssembly/libcall.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o - | obj2yaml | FileCheck %s +; RUN: llc -filetype=obj -mattr=-bulk-memory,-bulk-memory-opt %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" From 7907292daa781aaba2be531a8599998f5fd3f645 Mon Sep 17 00:00:00 2001 From: fengfeng <153487255+fengfeng09@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:21:03 +0800 Subject: [PATCH 009/191] [DAG] Apply Disjoint flag. (#118045) or disjoint (or disjoint (x, c0), c1) --> or disjont x, or (c0, c1) Alive2: https://alive2.llvm.org/ce/z/3wPth5 --------- Signed-off-by: feng.feng --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++++- .../AArch64/apply-disjoint-flag-in-dagcombine.ll | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/apply-disjoint-flag-in-dagcombine.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6c8e9969784c9..6435a2119077f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1220,8 +1220,11 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) { + NewFlags.setDisjoint(Flags.hasDisjoint() && + N0->getFlags().hasDisjoint()); return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags); + } return SDValue(); } if (TLI.isReassocProfitable(DAG, N0, N1)) { diff --git a/llvm/test/CodeGen/AArch64/apply-disjoint-flag-in-dagcombine.ll b/llvm/test/CodeGen/AArch64/apply-disjoint-flag-in-dagcombine.ll new file mode 100644 index 0000000000000..5622f2ae20efd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/apply-disjoint-flag-in-dagcombine.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s + +define i32 @test(i32 %a) { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w0, w0, #193 +; CHECK-NEXT: ret +entry: + %add = add i32 %a, 1 + %or1 = or disjoint i32 %add, 64 + %or = or disjoint i32 %or1, 128 + ret i32 %or +} From 814ed93e48db4d95965258e64e8580056828a264 Mon Sep 17 00:00:00 2001 From: c8ef Date: Tue, 3 Dec 2024 09:33:53 +0800 Subject: [PATCH 010/191] [clang] constexpr built-in elementwise bitreverse function. (#118177) Part of #51787. This patch adds constexpr support for the built-in elementwise bitreverse function. --- clang/docs/LanguageExtensions.rst | 2 +- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Basic/Builtins.td | 2 +- clang/lib/AST/ExprConstant.cpp | 21 ++++++++++++++----- .../test/CodeGen/builtins-elementwise-math.c | 2 +- clang/test/Sema/constant_builtins_vector.cpp | 5 +++++ 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index c053a5ab3c528..52032e935928f 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -648,7 +648,7 @@ elementwise to the input. Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±infinity The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``, -can be called in a ``constexpr`` context. +``__builtin_elementwise_bitreverse``, can be called in a ``constexpr`` context. ============================================== ====================================================================== ========================================= Name Operation Supported element types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0bb2eb820cd72..20bd27ad52f57 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -404,6 +404,7 @@ Non-comprehensive list of changes in this release - ``__builtin_reduce_and`` function can now be used in constant expressions. - ``__builtin_reduce_or`` and ``__builtin_reduce_xor`` functions can now be used in constant expressions. - ``__builtin_elementwise_popcount`` function can now be used in constant expressions. +- ``__builtin_elementwise_bitreverse`` function can now be used in constant expressions. New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 130e91103da06..dda44f3abe016 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1270,7 +1270,7 @@ def ElementwiseATan2 : Builtin { def ElementwiseBitreverse : Builtin { let Spellings = ["__builtin_elementwise_bitreverse"]; - let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; let Prototype = "void(...)"; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index bb5ab67328fbc..6b5b95aee3552 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11310,7 +11310,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { default: return false; - case Builtin::BI__builtin_elementwise_popcount: { + case Builtin::BI__builtin_elementwise_popcount: + case Builtin::BI__builtin_elementwise_bitreverse: { APValue Source; if (!EvaluateAsRValue(Info, E->getArg(0), Source)) return false; @@ -11322,9 +11323,18 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { APSInt Elt = Source.getVectorElt(EltNum).getInt(); - ResultElements.push_back( - APValue(APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), Elt.popcount()), - DestEltTy->isUnsignedIntegerOrEnumerationType()))); + switch (E->getBuiltinCallee()) { + case Builtin::BI__builtin_elementwise_popcount: + ResultElements.push_back(APValue( + APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), Elt.popcount()), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + break; + case Builtin::BI__builtin_elementwise_bitreverse: + ResultElements.push_back( + APValue(APSInt(Elt.reverseBits(), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + break; + } } return Success(APValue(ResultElements.data(), ResultElements.size()), E); @@ -12833,7 +12843,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_bitreverse8: case Builtin::BI__builtin_bitreverse16: case Builtin::BI__builtin_bitreverse32: - case Builtin::BI__builtin_bitreverse64: { + case Builtin::BI__builtin_bitreverse64: + case Builtin::BI__builtin_elementwise_bitreverse: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index f1f34432ca0ea..82f82dd1ed794 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -443,7 +443,7 @@ void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2, // CHECK-NEXT: call i32 @llvm.bitreverse.i32(i32 [[IA1]]) b = __builtin_elementwise_bitreverse(int_as_one); - // CHECK: call i32 @llvm.bitreverse.i32(i32 -10) + // CHECK: store i32 1879048191, ptr @b, align 4 b = __builtin_elementwise_bitreverse(-10); // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2 diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp index 772a682141ce4..45c729f76418d 100644 --- a/clang/test/Sema/constant_builtins_vector.cpp +++ b/clang/test/Sema/constant_builtins_vector.cpp @@ -817,3 +817,8 @@ static_assert(__builtin_elementwise_popcount(~0U) == 8 * sizeof(int)); static_assert(__builtin_elementwise_popcount(0L) == 0); static_assert(__builtin_elementwise_popcount(0xF0F0L) == 8); static_assert(__builtin_elementwise_popcount(~0LL) == 8 * sizeof(long long)); + +static_assert(__builtin_elementwise_bitreverse(0x12345678) == 0x1E6A2C48); +static_assert(__builtin_elementwise_bitreverse(0x0123456789ABCDEFULL) == 0xF7B3D591E6A2C480); +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_bitreverse((vector4char){1, 2, 4, 8})) == (LITTLE_END ? 0x10204080 : 0x80402010)); +static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_bitreverse((vector4short){1, 2, 4, 8})) == (LITTLE_END ? 0x1000200040008000 : 0x8000400020001000)); From 1724188c19f363c877fcf1bca86d92af3864b338 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 3 Dec 2024 08:58:48 +0700 Subject: [PATCH 011/191] [ObjectYAML][ELF] Take alignment into account when generating notes (#118157) The [System V ABI](https://www.sco.com/developers/gabi/latest/ch5.pheader.html#note_section) states that the note entries and their descriptor fields must be aligned to 4 or 8 bytes for 32-bit or 64-bit objects respectively. In practice, 64-bit systems can use both alignments, with the actual format being determined by the alignment of the segment. For example, the [Linux gABI extension](https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf) contains a special note on this, see 2.1.7 "Alignment of Note Sections". This patch adjusts the format of the generated notes to the specified section alignment. Since `llvm-readobj` was fixed in a similar way in [D150022](https://reviews.llvm.org/D150022), "[Object] Fix handling of Elf_Nhdr with sh_addralign=8", the generated notes can now be parsed successfully by the tool. --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 19 ++- .../test/tools/yaml2obj/ELF/note-section.yaml | 129 ++++++++++++++++++ 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 476334024151a..001e701bd0b80 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1799,6 +1799,20 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, if (!Section.Notes) return; + unsigned Align; + switch (SHeader.sh_addralign) { + case 0: + case 4: + Align = 4; + break; + case 8: + Align = 8; + break; + default: + reportError(Section.Name + ": invalid alignment for a note section: 0x" + + Twine::utohexstr(SHeader.sh_addralign)); + } + uint64_t Offset = CBA.tell(); for (const ELFYAML::NoteEntry &NE : *Section.Notes) { // Write name size. @@ -1820,14 +1834,15 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, if (!NE.Name.empty()) { CBA.write(NE.Name.data(), NE.Name.size()); CBA.write('\0'); - CBA.padToAlignment(4); } // Write description and padding. if (NE.Desc.binary_size() != 0) { + CBA.padToAlignment(Align); CBA.writeAsBinary(NE.Desc); - CBA.padToAlignment(4); } + + CBA.padToAlignment(Align); } SHeader.sh_size = CBA.tell() - Offset; diff --git a/llvm/test/tools/yaml2obj/ELF/note-section.yaml b/llvm/test/tools/yaml2obj/ELF/note-section.yaml index 80359c4ec0183..26b95e1c2379b 100644 --- a/llvm/test/tools/yaml2obj/ELF/note-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/note-section.yaml @@ -333,3 +333,132 @@ Sections: - Name: ABC Desc: '123456' Type: NT_VERSION + +## Check that an incorrect alignment is reported + +# RUN: not yaml2obj --docnum=16 %s 2>&1 | FileCheck %s --check-prefix=ERR_ALIGN1 +# ERR_ALIGN1: error: .note.foo: invalid alignment for a note section: 0x1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .note.foo + Type: SHT_NOTE + AddressAlign: 1 + Notes: + - Type: 0x1 + +## Check that note entries and their `Desc` fields are aligned according to the +## specified section alignment + +# RUN: yaml2obj --docnum=17 %s -o - | \ +# RUN: llvm-readobj --sections --section-data --notes - | \ +# RUN: FileCheck %s --check-prefix=TEST17 + +# TEST17: Name: .note.foo4 +# TEST17: SectionData ( +# TEST17-NEXT: 0000: 05000000 02000000 01000000 41424344 |............ABCD| +# TEST17-NEXT: 0010: 00000000 01020000 00000000 03000000 |................| +# TEST17-NEXT: 0020: 02000000 03040500 04000000 00000000 |................| +# TEST17-NEXT: 0030: 03000000 474E5500 |....GNU.| +# TEST17-NEXT: ) +# TEST17: Name: .note.foo8 +# TEST17: SectionData ( +# TEST17-NEXT: 0000: 05000000 02000000 01000000 41424344 |............ABCD| +# TEST17-NEXT: 0010: 00000000 00000000 01020000 00000000 |................| +# TEST17-NEXT: 0020: 00000000 03000000 02000000 00000000 |................| +# TEST17-NEXT: 0030: 03040500 00000000 04000000 00000000 |................| +# TEST17-NEXT: 0040: 03000000 474E5500 |....GNU.| +# TEST17-NEXT: ) +# TEST17: NoteSections [ +# TEST17-NEXT: NoteSection { +# TEST17-NEXT: Name: .note.foo4 +# TEST17-NEXT: Offset: +# TEST17-NEXT: Size: +# TEST17-NEXT: Notes [ +# TEST17-NEXT: { +# TEST17-NEXT: Owner: ABCD +# TEST17-NEXT: Data size: 0x2 +# TEST17-NEXT: Type: NT_VERSION (version) +# TEST17-NEXT: Description data ( +# TEST17-NEXT: 0000: 0102 |..| +# TEST17-NEXT: ) +# TEST17-NEXT: } +# TEST17-NEXT: { +# TEST17-NEXT: Owner: +# TEST17-NEXT: Data size: 0x3 +# TEST17-NEXT: Type: NT_ARCH (architecture) +# TEST17-NEXT: Description data ( +# TEST17-NEXT: 0000: 030405 |...| +# TEST17-NEXT: ) +# TEST17-NEXT: } +# TEST17-NEXT: { +# TEST17-NEXT: Owner: GNU +# TEST17-NEXT: Data size: 0x0 +# TEST17-NEXT: Type: NT_GNU_BUILD_ID (unique build ID bitstring) +# TEST17-NEXT: Build ID: +# TEST17-NEXT: } +# TEST17-NEXT: ] +# TEST17-NEXT: } +# TEST17-NEXT: NoteSection { +# TEST17-NEXT: Name: .note.foo8 +# TEST17-NEXT: Offset: +# TEST17-NEXT: Size: +# TEST17-NEXT: Notes [ +# TEST17-NEXT: { +# TEST17-NEXT: Owner: ABCD +# TEST17-NEXT: Data size: 0x2 +# TEST17-NEXT: Type: NT_VERSION (version) +# TEST17-NEXT: Description data ( +# TEST17-NEXT: 0000: 0102 |..| +# TEST17-NEXT: ) +# TEST17-NEXT: } +# TEST17-NEXT: { +# TEST17-NEXT: Owner: +# TEST17-NEXT: Data size: 0x3 +# TEST17-NEXT: Type: NT_ARCH (architecture) +# TEST17-NEXT: Description data ( +# TEST17-NEXT: 0000: 030405 |...| +# TEST17-NEXT: ) +# TEST17-NEXT: } +# TEST17-NEXT: { +# TEST17-NEXT: Owner: GNU +# TEST17-NEXT: Data size: 0x0 +# TEST17-NEXT: Type: NT_GNU_BUILD_ID (unique build ID bitstring) +# TEST17-NEXT: Build ID: +# TEST17-NEXT: } +# TEST17-NEXT: ] +# TEST17-NEXT: } +# TEST17-NEXT: ] + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .note.foo4 + Type: SHT_NOTE + AddressAlign: 4 + Notes: + - Name: ABCD + Type: NT_VERSION + Desc: 0102 + - Type: NT_ARCH + Desc: 030405 + - Name: GNU + Type: NT_GNU_BUILD_ID + - Name: .note.foo8 + Type: SHT_NOTE + AddressAlign: 8 + Notes: + - Name: ABCD + Type: NT_VERSION + Desc: 0102 + - Type: NT_ARCH + Desc: 030405 + - Name: GNU + Type: NT_GNU_BUILD_ID From fb140125ac679c33c2c663062c6409e09fc885b9 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 3 Dec 2024 13:04:46 +1100 Subject: [PATCH 012/191] [ORC] Fix typo in comment. NFC. --- llvm/lib/ExecutionEngine/Orc/Core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 222135bd77688..f226e81cc02a6 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -3308,7 +3308,7 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, continue; } - // If we get here thene Dep is Emitted. We need to look up its defining + // If we get here then Dep is Emitted. We need to look up its defining // EDU and add this EDU to the defining EDU's list of users (this means // creating an EDUInfos entry if the defining EDU doesn't have one // already). From 9ea993f975b045679907a0789d6fd4d7180593a0 Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Tue, 3 Dec 2024 10:10:17 +0800 Subject: [PATCH 013/191] [Clang] Recover GLTemplateParameterList for generic lambdas in RebuildLambdaScopeInfo (#118176) The NTTP argument appearing inside a trailing return type of a generic lambda would have us check for potential lambda captures, where the function needs GLTemplateParameterList of the current LSI to tell whether the lambda is generic. The lambda scope in this context is rebuilt by the LambdaScopeForCallOperatorInstantiationRAII when substituting the lambda operator during template argument deduction. Thus, I think the template parameter list should be preserved in the rebuilding process, as it seems otherwise innocuous to me. Fixes #115931 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaDecl.cpp | 21 ++++++++++++++++--- .../SemaCXX/lambda-capture-type-deduction.cpp | 16 ++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 20bd27ad52f57..01c7899e36c93 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -764,6 +764,8 @@ Bug Fixes to C++ Support - Fixed an assertion failure caused by using ``consteval`` in condition in consumed analyses. (#GH117385) - Fix a crash caused by incorrect argument position in merging deduced template arguments. (#GH113659) - Fixed an assertion failure caused by mangled names with invalid identifiers. (#GH112205) +- Fixed an incorrect lambda scope of generic lambdas that caused Clang to crash when computing potential lambda + captures at the end of a full expression. (#GH115931) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index ba574307055c6..c4bb73b2924bc 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -15519,10 +15519,25 @@ LambdaScopeInfo *Sema::RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator) { LSI->CallOperator = CallOperator; LSI->Lambda = LambdaClass; LSI->ReturnType = CallOperator->getReturnType(); - // This function in calls in situation where the context of the call operator - // is not entered, so we set AfterParameterList to false, so that + // When this function is called in situation where the context of the call + // operator is not entered, we set AfterParameterList to false, so that // `tryCaptureVariable` finds explicit captures in the appropriate context. - LSI->AfterParameterList = false; + // There is also at least a situation as in FinishTemplateArgumentDeduction(), + // where we would set the CurContext to the lambda operator before + // substituting into it. In this case the flag needs to be true such that + // tryCaptureVariable can correctly handle potential captures thereof. + LSI->AfterParameterList = CurContext == CallOperator; + + // GLTemplateParameterList is necessary for getCurGenericLambda() which is + // used at the point of dealing with potential captures. + // + // We don't use LambdaClass->isGenericLambda() because this value doesn't + // flip for instantiated generic lambdas, where no FunctionTemplateDecls are + // associated. (Technically, we could recover that list from their + // instantiation patterns, but for now, the GLTemplateParameterList seems + // unnecessary in these cases.) + if (FunctionTemplateDecl *FTD = CallOperator->getDescribedFunctionTemplate()) + LSI->GLTemplateParameterList = FTD->getTemplateParameters(); const LambdaCaptureDefault LCD = LambdaClass->getLambdaCaptureDefault(); if (LCD == LCD_None) diff --git a/clang/test/SemaCXX/lambda-capture-type-deduction.cpp b/clang/test/SemaCXX/lambda-capture-type-deduction.cpp index a86f301898992..234cb6806f041 100644 --- a/clang/test/SemaCXX/lambda-capture-type-deduction.cpp +++ b/clang/test/SemaCXX/lambda-capture-type-deduction.cpp @@ -298,6 +298,22 @@ void __trans_tmp_1() { } +namespace GH115931 { + +struct Range {}; + +template +struct LengthPercentage {}; + +void reflectSum() { + Range resultR; + [&] (auto) -> LengthPercentage { + return {}; + }(0); +} + +} // namespace GH115931 + namespace GH47400 { struct Foo {}; From eb3f1aec6eff08ce1c76259bb0801f6457a55400 Mon Sep 17 00:00:00 2001 From: LiqinWeng Date: Tue, 3 Dec 2024 10:17:52 +0800 Subject: [PATCH 014/191] [TTI][RISCV] Implement cost of some intrinsics with LMUL (#117874) Intrinsics include: sadd_sat/ssub_sat/uadd_sat/usub_sat/fabs/fsqrt/cttz/ctlz/ctpop --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 55 +++++++- .../CostModel/RISCV/fp-min-max-abs.ll | 66 ++++++---- .../Analysis/CostModel/RISCV/fp-sqrt-pow.ll | 34 ++--- .../Analysis/CostModel/RISCV/int-bit-manip.ll | 108 ++++++++-------- .../Analysis/CostModel/RISCV/int-sat-math.ll | 120 +++++++++--------- 5 files changed, 223 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 9a5a48333904c..aed476db1956f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1013,20 +1013,65 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: case Intrinsic::uadd_sat: - case Intrinsic::usub_sat: + case Intrinsic::usub_sat: { + auto LT = getTypeLegalizationCost(RetTy); + if (ST->hasVInstructions() && LT.second.isVector()) { + unsigned Op; + switch (ICA.getID()) { + case Intrinsic::sadd_sat: + Op = RISCV::VSADD_VV; + break; + case Intrinsic::ssub_sat: + Op = RISCV::VSSUBU_VV; + break; + case Intrinsic::uadd_sat: + Op = RISCV::VSADDU_VV; + break; + case Intrinsic::usub_sat: + Op = RISCV::VSSUBU_VV; + break; + } + return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind); + } + break; + } case Intrinsic::fabs: case Intrinsic::sqrt: { auto LT = getTypeLegalizationCost(RetTy); - if (ST->hasVInstructions() && LT.second.isVector()) - return LT.first; + // TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin + if (ST->hasVInstructions() && LT.second.isVector()) { + unsigned Op; + switch (ICA.getID()) { + case Intrinsic::fabs: + Op = RISCV::VFSGNJX_VV; + break; + case Intrinsic::sqrt: + Op = RISCV::VFSQRT_V; + break; + } + return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind); + } break; } case Intrinsic::cttz: case Intrinsic::ctlz: case Intrinsic::ctpop: { auto LT = getTypeLegalizationCost(RetTy); - if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) - return LT.first; + if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) { + unsigned Op; + switch (ICA.getID()) { + case Intrinsic::cttz: + Op = RISCV::VCTZ_V; + break; + case Intrinsic::ctlz: + Op = RISCV::VCLZ_V; + break; + case Intrinsic::ctpop: + Op = RISCV::VCPOP_V; + break; + } + return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind); + } break; } case Intrinsic::abs: { diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll index 0b2c8da4438da..9eb06a07f135f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll @@ -1,30 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define void @fabs() { ; CHECK-LABEL: 'fabs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fabs.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.fabs.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.fabs.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.fabs.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.fabs.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.fabs.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call double @llvm.fabs.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.fabs.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.fabs.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.fabs.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.fabs.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.fabs.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fabs.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.fabs.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.fabs.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.fabs.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call @llvm.fabs.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.fabs.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.fabs.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.fabs.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.fabs.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call @llvm.fabs.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call @llvm.fabs.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call double @llvm.fabs.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %24 = call <16 x double> @llvm.fabs.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.fabs.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.fabs.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %27 = call @llvm.fabs.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call @llvm.fabs.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.fabs.bf16(bfloat undef) + call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.fabs.v16f16(<16 x bfloat> undef) + call @llvm.fabs.nxv2bf16( undef) + call @llvm.fabs.nxv4bf16( undef) + call @llvm.fabs.nxv8bf16( undef) + call @llvm.fabs.nxv16f16( undef) call float @llvm.fabs.f32(float undef) call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) @@ -53,11 +71,11 @@ define void @fabs_f16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.fabs.nxv2f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.fabs.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.fabs.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.fabs.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.fabs.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call @llvm.fabs.nxv16f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call half @llvm.fabs.f16(half undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll index be9c19dc59a85..446627f6bf3c0 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll @@ -8,30 +8,30 @@ define void @sqrt() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.sqrt.nxv2bf16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.sqrt.nxv4bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.sqrt.nxv8bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.sqrt.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.sqrt.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call @llvm.sqrt.nxv16bf16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.sqrt.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.sqrt.nxv1f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.sqrt.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.sqrt.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.sqrt.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.sqrt.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.sqrt.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call @llvm.sqrt.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call @llvm.sqrt.nxv16f32( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call double @llvm.sqrt.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <16 x double> @llvm.sqrt.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %24 = call <16 x double> @llvm.sqrt.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.sqrt.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.sqrt.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.sqrt.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.sqrt.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.sqrt.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %27 = call @llvm.sqrt.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call @llvm.sqrt.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call bfloat @llvm.sqrt.bf16(bfloat undef) @@ -71,11 +71,11 @@ define void @sqrt_f16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.sqrt.nxv2f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.sqrt.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.sqrt.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.sqrt.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.sqrt.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call @llvm.sqrt.nxv16f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call half @llvm.sqrt.f16(half undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll index 55db70ce1e912..5f2728f93d551 100644 --- a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll +++ b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll @@ -209,37 +209,37 @@ define void @ctlz() { ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.ctlz.nxv2i8( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.ctlz.nxv4i8( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.ctlz.nxv8i8( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.ctlz.nxv16i8( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.ctlz.nxv32i8( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.ctlz.nxv64i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.ctlz.nxv16i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call @llvm.ctlz.nxv32i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call @llvm.ctlz.nxv64i8( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.ctlz.nxv1i16( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.ctlz.nxv2i16( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.ctlz.nxv4i16( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.ctlz.nxv8i16( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.ctlz.nxv16i16( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.ctlz.nxv32i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.ctlz.nxv8i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.ctlz.nxv16i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call @llvm.ctlz.nxv32i16( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.ctlz.nxv1i32( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.ctlz.nxv2i32( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.ctlz.nxv4i32( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.ctlz.nxv8i32( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.ctlz.nxv16i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.ctlz.nxv4i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.ctlz.nxv8i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.ctlz.nxv16i32( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.ctlz.nxv1i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.ctlz.nxv2i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.ctlz.nxv4i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.ctlz.nxv8i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.ctlz.nxv16i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = call @llvm.ctlz.nxv2i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = call @llvm.ctlz.nxv4i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %38 = call @llvm.ctlz.nxv8i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %39 = call @llvm.ctlz.nxv16i64( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) @@ -336,37 +336,37 @@ define void @cttz() { ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.cttz.nxv2i8( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.cttz.nxv4i8( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.cttz.nxv8i8( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.cttz.nxv16i8( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.cttz.nxv32i8( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.cttz.nxv64i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.cttz.nxv16i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call @llvm.cttz.nxv32i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call @llvm.cttz.nxv64i8( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.cttz.nxv1i16( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.cttz.nxv2i16( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.cttz.nxv4i16( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.cttz.nxv8i16( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.cttz.nxv16i16( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.cttz.nxv32i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.cttz.nxv8i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.cttz.nxv16i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call @llvm.cttz.nxv32i16( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.cttz.nxv1i32( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.cttz.nxv2i32( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.cttz.nxv4i32( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.cttz.nxv8i32( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.cttz.nxv16i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.cttz.nxv4i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.cttz.nxv8i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.cttz.nxv16i32( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.cttz.nxv1i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.cttz.nxv2i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.cttz.nxv4i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.cttz.nxv8i64( undef, i1 false) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.cttz.nxv16i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = call @llvm.cttz.nxv2i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = call @llvm.cttz.nxv4i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %38 = call @llvm.cttz.nxv8i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %39 = call @llvm.cttz.nxv16i64( undef, i1 false) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) @@ -465,37 +465,37 @@ define void @ctpop() { ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.ctpop.nxv2i8( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.ctpop.nxv4i8( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.ctpop.nxv8i8( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.ctpop.nxv16i8( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.ctpop.nxv16i8( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.ctpop.nxv1i16( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.ctpop.nxv2i16( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.ctpop.nxv4i16( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.ctpop.nxv8i16( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.ctpop.nxv16i16( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.ctpop.nxv8i16( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.ctpop.nxv16i16( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.ctpop.nxv1i32( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.ctpop.nxv2i32( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.ctpop.nxv4i32( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.ctpop.nxv8i32( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.ctpop.nxv16i32( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.ctpop.nxv4i32( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.ctpop.nxv8i32( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.ctpop.nxv16i32( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.ctpop.nxv1i64( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.ctpop.nxv2i64( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.ctpop.nxv4i64( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call @llvm.ctpop.nxv8i64( undef) -; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = call @llvm.ctpop.nxv16i64( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call @llvm.ctpop.nxv2i64( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = call @llvm.ctpop.nxv4i64( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %39 = call @llvm.ctpop.nxv8i64( undef) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %40 = call @llvm.ctpop.nxv16i64( undef) ; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.ctpop.i8(i8 undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll index be6b7c57d2252..0758eb204be48 100644 --- a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll +++ b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll @@ -11,33 +11,33 @@ define void @sadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.sadd.sat.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.sadd.sat.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.sadd.sat.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.sadd.sat.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.sadd.sat.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.sadd.sat.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.sadd.sat.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.sadd.sat.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.sadd.sat.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.sadd.sat.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call @llvm.sadd.sat.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call @llvm.sadd.sat.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.sadd.sat.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.sadd.sat.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.sadd.sat.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.sadd.sat.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.sadd.sat.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.sadd.sat.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call @llvm.sadd.sat.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call @llvm.sadd.sat.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.sadd.sat.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call @llvm.sadd.sat.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call @llvm.sadd.sat.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call @llvm.sadd.sat.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) @@ -88,33 +88,33 @@ define void @uadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.uadd.sat.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.uadd.sat.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.uadd.sat.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.uadd.sat.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.uadd.sat.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.uadd.sat.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.uadd.sat.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.uadd.sat.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.uadd.sat.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.uadd.sat.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call @llvm.uadd.sat.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call @llvm.uadd.sat.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.uadd.sat.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.uadd.sat.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.uadd.sat.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.uadd.sat.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.uadd.sat.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.uadd.sat.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call @llvm.uadd.sat.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call @llvm.uadd.sat.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.uadd.sat.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call @llvm.uadd.sat.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call @llvm.uadd.sat.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call @llvm.uadd.sat.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) @@ -165,33 +165,33 @@ define void @usub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.usub.sat.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.usub.sat.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.usub.sat.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.usub.sat.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.usub.sat.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.usub.sat.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.usub.sat.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.usub.sat.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.usub.sat.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.usub.sat.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call @llvm.usub.sat.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call @llvm.usub.sat.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.usub.sat.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.usub.sat.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.usub.sat.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.usub.sat.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.usub.sat.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.usub.sat.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> undef, <16 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call @llvm.usub.sat.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call @llvm.usub.sat.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.usub.sat.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call @llvm.usub.sat.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call @llvm.usub.sat.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call @llvm.usub.sat.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) @@ -242,33 +242,33 @@ define void @ssub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.ssub.sat.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.ssub.sat.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.ssub.sat.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.ssub.sat.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.ssub.sat.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.ssub.sat.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.ssub.sat.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.ssub.sat.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.ssub.sat.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.ssub.sat.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call @llvm.ssub.sat.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call @llvm.ssub.sat.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.ssub.sat.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.ssub.sat.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.ssub.sat.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.ssub.sat.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.ssub.sat.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.ssub.sat.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> undef, <16 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call @llvm.ssub.sat.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call @llvm.ssub.sat.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.ssub.sat.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call @llvm.ssub.sat.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call @llvm.ssub.sat.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call @llvm.ssub.sat.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) From 95979717e1122d4bb473efef2447ae92c24c9381 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 2 Dec 2024 18:34:27 -0800 Subject: [PATCH 015/191] Revert "[ObjectYAML][ELF] Take alignment into account when generating notes (#118157)" This reverts commit 1724188c19f363c877fcf1bca86d92af3864b338. Some build bots reported a failure in the updated test --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 19 +-- .../test/tools/yaml2obj/ELF/note-section.yaml | 129 ------------------ 2 files changed, 2 insertions(+), 146 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 001e701bd0b80..476334024151a 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1799,20 +1799,6 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, if (!Section.Notes) return; - unsigned Align; - switch (SHeader.sh_addralign) { - case 0: - case 4: - Align = 4; - break; - case 8: - Align = 8; - break; - default: - reportError(Section.Name + ": invalid alignment for a note section: 0x" + - Twine::utohexstr(SHeader.sh_addralign)); - } - uint64_t Offset = CBA.tell(); for (const ELFYAML::NoteEntry &NE : *Section.Notes) { // Write name size. @@ -1834,15 +1820,14 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, if (!NE.Name.empty()) { CBA.write(NE.Name.data(), NE.Name.size()); CBA.write('\0'); + CBA.padToAlignment(4); } // Write description and padding. if (NE.Desc.binary_size() != 0) { - CBA.padToAlignment(Align); CBA.writeAsBinary(NE.Desc); + CBA.padToAlignment(4); } - - CBA.padToAlignment(Align); } SHeader.sh_size = CBA.tell() - Offset; diff --git a/llvm/test/tools/yaml2obj/ELF/note-section.yaml b/llvm/test/tools/yaml2obj/ELF/note-section.yaml index 26b95e1c2379b..80359c4ec0183 100644 --- a/llvm/test/tools/yaml2obj/ELF/note-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/note-section.yaml @@ -333,132 +333,3 @@ Sections: - Name: ABC Desc: '123456' Type: NT_VERSION - -## Check that an incorrect alignment is reported - -# RUN: not yaml2obj --docnum=16 %s 2>&1 | FileCheck %s --check-prefix=ERR_ALIGN1 -# ERR_ALIGN1: error: .note.foo: invalid alignment for a note section: 0x1 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .note.foo - Type: SHT_NOTE - AddressAlign: 1 - Notes: - - Type: 0x1 - -## Check that note entries and their `Desc` fields are aligned according to the -## specified section alignment - -# RUN: yaml2obj --docnum=17 %s -o - | \ -# RUN: llvm-readobj --sections --section-data --notes - | \ -# RUN: FileCheck %s --check-prefix=TEST17 - -# TEST17: Name: .note.foo4 -# TEST17: SectionData ( -# TEST17-NEXT: 0000: 05000000 02000000 01000000 41424344 |............ABCD| -# TEST17-NEXT: 0010: 00000000 01020000 00000000 03000000 |................| -# TEST17-NEXT: 0020: 02000000 03040500 04000000 00000000 |................| -# TEST17-NEXT: 0030: 03000000 474E5500 |....GNU.| -# TEST17-NEXT: ) -# TEST17: Name: .note.foo8 -# TEST17: SectionData ( -# TEST17-NEXT: 0000: 05000000 02000000 01000000 41424344 |............ABCD| -# TEST17-NEXT: 0010: 00000000 00000000 01020000 00000000 |................| -# TEST17-NEXT: 0020: 00000000 03000000 02000000 00000000 |................| -# TEST17-NEXT: 0030: 03040500 00000000 04000000 00000000 |................| -# TEST17-NEXT: 0040: 03000000 474E5500 |....GNU.| -# TEST17-NEXT: ) -# TEST17: NoteSections [ -# TEST17-NEXT: NoteSection { -# TEST17-NEXT: Name: .note.foo4 -# TEST17-NEXT: Offset: -# TEST17-NEXT: Size: -# TEST17-NEXT: Notes [ -# TEST17-NEXT: { -# TEST17-NEXT: Owner: ABCD -# TEST17-NEXT: Data size: 0x2 -# TEST17-NEXT: Type: NT_VERSION (version) -# TEST17-NEXT: Description data ( -# TEST17-NEXT: 0000: 0102 |..| -# TEST17-NEXT: ) -# TEST17-NEXT: } -# TEST17-NEXT: { -# TEST17-NEXT: Owner: -# TEST17-NEXT: Data size: 0x3 -# TEST17-NEXT: Type: NT_ARCH (architecture) -# TEST17-NEXT: Description data ( -# TEST17-NEXT: 0000: 030405 |...| -# TEST17-NEXT: ) -# TEST17-NEXT: } -# TEST17-NEXT: { -# TEST17-NEXT: Owner: GNU -# TEST17-NEXT: Data size: 0x0 -# TEST17-NEXT: Type: NT_GNU_BUILD_ID (unique build ID bitstring) -# TEST17-NEXT: Build ID: -# TEST17-NEXT: } -# TEST17-NEXT: ] -# TEST17-NEXT: } -# TEST17-NEXT: NoteSection { -# TEST17-NEXT: Name: .note.foo8 -# TEST17-NEXT: Offset: -# TEST17-NEXT: Size: -# TEST17-NEXT: Notes [ -# TEST17-NEXT: { -# TEST17-NEXT: Owner: ABCD -# TEST17-NEXT: Data size: 0x2 -# TEST17-NEXT: Type: NT_VERSION (version) -# TEST17-NEXT: Description data ( -# TEST17-NEXT: 0000: 0102 |..| -# TEST17-NEXT: ) -# TEST17-NEXT: } -# TEST17-NEXT: { -# TEST17-NEXT: Owner: -# TEST17-NEXT: Data size: 0x3 -# TEST17-NEXT: Type: NT_ARCH (architecture) -# TEST17-NEXT: Description data ( -# TEST17-NEXT: 0000: 030405 |...| -# TEST17-NEXT: ) -# TEST17-NEXT: } -# TEST17-NEXT: { -# TEST17-NEXT: Owner: GNU -# TEST17-NEXT: Data size: 0x0 -# TEST17-NEXT: Type: NT_GNU_BUILD_ID (unique build ID bitstring) -# TEST17-NEXT: Build ID: -# TEST17-NEXT: } -# TEST17-NEXT: ] -# TEST17-NEXT: } -# TEST17-NEXT: ] - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .note.foo4 - Type: SHT_NOTE - AddressAlign: 4 - Notes: - - Name: ABCD - Type: NT_VERSION - Desc: 0102 - - Type: NT_ARCH - Desc: 030405 - - Name: GNU - Type: NT_GNU_BUILD_ID - - Name: .note.foo8 - Type: SHT_NOTE - AddressAlign: 8 - Notes: - - Name: ABCD - Type: NT_VERSION - Desc: 0102 - - Type: NT_ARCH - Desc: 030405 - - Name: GNU - Type: NT_GNU_BUILD_ID From fc9052ee258e35c5aaba3dc2c1419870975f3a7a Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 03:36:57 +0100 Subject: [PATCH 016/191] [clang][bytecode] Check __builtin_memcpy for null pointers (#118313) --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/test/AST/ByteCode/builtin-functions.cpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index db3703a60db69..d7e32c491b03b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1796,6 +1796,14 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, return true; } + if (SrcPtr.isZero() || DestPtr.isZero()) { + Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr); + S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_null) + << /*IsMove=*/false << /*IsWchar=*/false << !SrcPtr.isZero() + << DiagPtr.toDiagnosticString(S.getASTContext()); + return false; + } + if (!DoBitCastPtr(S, OpPC, SrcPtr, DestPtr)) return false; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 9a6a31b5ec4c0..dfee35d6399a6 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1138,4 +1138,12 @@ namespace BuiltinMemcpy { return b; } static_assert(simple() == 12); + + + extern struct Incomplete incomplete; + constexpr struct Incomplete *null_incomplete = 0; + static_assert(__builtin_memcpy(null_incomplete, null_incomplete, sizeof(wchar_t))); // both-error {{not an integral constant expression}} \ + // both-note {{source of 'memcpy' is nullptr}} + + } From 2af2634c64b1c6f34c4905b24863b930effe3b9c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 2 Dec 2024 18:37:32 -0800 Subject: [PATCH 017/191] [RISCV] Use vcompress in deinterleave2 intrinsic lowering (#118325) This is analogous to febbf91 which added shuffle lowering using vcompress; we can do the same thing in the deinterleave2 lowering path which is used for scalable vectors. Note that we can further improve this for high lmul usage by adjusting how we materialize the mask (whose result is at most m1 with a known bit pattern). I am deliberately staging the work so that the changes to reduce register pressure are more easily evaluated on their own merit. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 37 +++-- .../RISCV/rvv/vector-deinterleave-load.ll | 76 +++------ .../CodeGen/RISCV/rvv/vector-deinterleave.ll | 153 +++++++----------- 3 files changed, 101 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d423fd421873f..b2e96b63a8095 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10736,10 +10736,6 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Op.getOperand(0), Op.getOperand(1)); - // We want to operate on all lanes, so get the mask and VL and mask for it - auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget); - SDValue Passthru = DAG.getUNDEF(ConcatVT); - // We can deinterleave through vnsrl.wi if the element type is smaller than // ELEN if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { @@ -10749,19 +10745,28 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, } // For the indices, use the same SEW to avoid an extra vsetvli + // TODO: If container type is larger than m1, we can consider using a splat + // of a constant instead of the following sequence + + // Create a vector of even indices {0, 1, 2, ...} MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); - // Create a vector of even indices {0, 2, 4, ...} - SDValue EvenIdx = - DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2)); - // Create a vector of odd indices {1, 3, 5, ... } - SDValue OddIdx = - DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT)); - - // Gather the even and odd elements into two separate vectors - SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, - Concat, EvenIdx, Passthru, Mask, VL); - SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, - Concat, OddIdx, Passthru, Mask, VL); + SDValue StepVec = DAG.getStepVector(DL, IdxVT); + // 0, 1, 0, 1, 0, 1 + SDValue ZeroOnes = + DAG.getNode(ISD::AND, DL, IdxVT, StepVec, DAG.getConstant(1, DL, IdxVT)); + MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1); + SDValue EvenMask = + DAG.getSetCC(DL, MaskVT, ZeroOnes, DAG.getConstant(0, DL, IdxVT), + ISD::CondCode::SETEQ); + // Have the latter be the not of the former to minimize the live range of + // the index vector since that might be large. + SDValue OddMask = DAG.getLogicalNOT(DL, EvenMask, MaskVT); + + // vcompress the even and odd elements into two separate vectors + SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat, + EvenMask, DAG.getUNDEF(ConcatVT)); + SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat, + OddMask, DAG.getUNDEF(ConcatVT)); // Extract the result half of the gather for even and odd SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide, diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 39a1bfcda3d83..4338d1f61af72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -106,95 +106,55 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: vadd.vv v24, v8, v8 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vmseq.vi v24, v8, 0 ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vadd.vi v8, v24, 1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vrgather.vv v8, v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmnot.m v6, v24 +; CHECK-NEXT: vcompress.vm v8, v16, v24 +; CHECK-NEXT: vmv1r.v v13, v24 +; CHECK-NEXT: vcompress.vm v24, v16, v6 +; CHECK-NEXT: vmv1r.v v12, v6 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v24, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v0, v16, v13 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v24, v0 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vcompress.vm v0, v16, v12 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v12, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v28, v16 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index f20a90a422313..99743066c79a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -73,12 +73,13 @@ define {, } @vector_deinterleave_nxv2i64_nxv ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vadd.vv v16, v12, v12 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vadd.vi v16, v16, 1 -; CHECK-NEXT: vrgather.vv v20, v8, v16 +; CHECK-NEXT: vand.vi v12, v12, 1 +; CHECK-NEXT: vmseq.vi v16, v12, 0 +; CHECK-NEXT: vcompress.vm v12, v8, v16 +; CHECK-NEXT: vmnot.m v14, v16 +; CHECK-NEXT: vcompress.vm v16, v8, v14 ; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: vmv2r.v v10, v20 +; CHECK-NEXT: vmv2r.v v10, v16 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv4i64( %vec) ret {, } %retval @@ -89,12 +90,13 @@ define {, } @vector_deinterleave_nxv4i64_nxv ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vv v24, v16, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: vadd.vi v24, v24, 1 -; CHECK-NEXT: vrgather.vv v0, v8, v24 +; CHECK-NEXT: vand.vi v16, v16, 1 +; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vcompress.vm v16, v8, v24 +; CHECK-NEXT: vmnot.m v20, v24 +; CHECK-NEXT: vcompress.vm v24, v8, v20 ; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: vmv4r.v v12, v0 +; CHECK-NEXT: vmv4r.v v12, v24 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv8i64( %vec) ret {, } %retval @@ -180,66 +182,50 @@ define {, } @vector_deinterleave_nxv8i64_nxv ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v0, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v8, v0 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vadd.vi v8, v0, 1 -; CHECK-NEXT: vrgather.vv v0, v24, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v24, v8 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vand.vi v24, v16, 1 +; CHECK-NEXT: vmseq.vi v16, v24, 0 +; CHECK-NEXT: vcompress.vm v24, v8, v16 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmnot.m v17, v16 +; CHECK-NEXT: vcompress.vm v0, v8, v17 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v24, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vcompress.vm v24, v8, v17 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v4, v8 +; CHECK-NEXT: vmv4r.v v4, v24 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -366,12 +352,13 @@ define {, } @vector_deinterleave_nxv2f ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vadd.vv v16, v12, v12 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vadd.vi v16, v16, 1 -; CHECK-NEXT: vrgather.vv v20, v8, v16 +; CHECK-NEXT: vand.vi v12, v12, 1 +; CHECK-NEXT: vmseq.vi v16, v12, 0 +; CHECK-NEXT: vcompress.vm v12, v8, v16 +; CHECK-NEXT: vmnot.m v14, v16 +; CHECK-NEXT: vcompress.vm v16, v8, v14 ; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: vmv2r.v v10, v20 +; CHECK-NEXT: vmv2r.v v10, v16 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv4f64( %vec) ret {, } %retval @@ -436,66 +423,50 @@ define {, } @vector_deinterleave_nxv8f ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v0, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v8, v0 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vadd.vi v8, v0, 1 -; CHECK-NEXT: vrgather.vv v0, v24, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v24, v8 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vand.vi v24, v16, 1 +; CHECK-NEXT: vmseq.vi v16, v24, 0 +; CHECK-NEXT: vcompress.vm v24, v8, v16 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmnot.m v17, v16 +; CHECK-NEXT: vcompress.vm v0, v8, v17 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v24, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vcompress.vm v24, v8, v17 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v4, v8 +; CHECK-NEXT: vmv4r.v v4, v24 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 From 2f5bfb41e2e0c4178627a3492beed35f8143ce2f Mon Sep 17 00:00:00 2001 From: hstk30-hw Date: Tue, 3 Dec 2024 11:18:17 +0800 Subject: [PATCH 018/191] [llvm-objdump] Default to --mattr=+all for AArch64be and AArch64_32 (#118311) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GNU objdump disassembles all unknown instructions by default. Complement for [D128030](https://reviews.llvm.org/D128030)。 --- llvm/test/tools/llvm-objdump/ELF/AArch64/mattr.s | 4 ++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/mattr.s b/llvm/test/tools/llvm-objdump/ELF/AArch64/mattr.s index e236660770648..5fd77b579984a 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AArch64/mattr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/mattr.s @@ -1,6 +1,10 @@ ## When --mattr and --mcpu are both empty, disassemble all known instructions. # RUN: llvm-mc -filetype=obj -triple=aarch64 -mattr=+all %s -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefixes=CHECK,ALL +# RUN: llvm-mc -filetype=obj -triple=aarch64_be -mattr=+all %s -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefixes=CHECK,ALL +# RUN: llvm-mc -filetype=obj -triple=aarch64_32 -mattr=+all %s -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefixes=CHECK,ALL ## If --mattr or --mcpu is specified, don't default to --mattr=+all. # RUN: llvm-objdump -d --no-show-raw-insn --mattr=+v8a %t | FileCheck %s --check-prefixes=CHECK,UNKNOWN diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 86ba9193dff2d..246d5cfa05818 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2556,7 +2556,7 @@ static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) { if (!MAttrs.empty()) { for (unsigned I = 0; I != MAttrs.size(); ++I) Features.AddFeature(MAttrs[I]); - } else if (MCPU.empty() && Obj->getArch() == llvm::Triple::aarch64) { + } else if (MCPU.empty() && Obj->makeTriple().isAArch64()) { Features.AddFeature("+all"); } From 877b934831d491e4388b82794cded417b1b11862 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Mon, 2 Dec 2024 19:34:25 -0800 Subject: [PATCH 019/191] [CompilerRT] Remove sanitizer support for i386 watchsim (#117013) This patch removes remaining support for i386 simulators, watch was the only one left though. resolves: rdar://102741146 --- compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake index f3c8fbe2c2fec..74a5d4edcd859 100644 --- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake @@ -136,14 +136,13 @@ function(darwin_test_archs os valid_archs) # The simple program will build for x86_64h on the simulator because it is # compatible with x86_64 libraries (mostly), but since x86_64h isn't actually - # a valid or useful architecture for the iOS simulator we should drop it. + # a valid or useful architecture for the simulators. We should drop it. if(${os} MATCHES "^(iossim|tvossim|watchossim)$") list(REMOVE_ITEM archs "x86_64h") - endif() - - if(${os} MATCHES "iossim") - message(STATUS "Disabling i386 slice for iossim") - list(REMOVE_ITEM archs "i386") + if ("i386" IN_LIST archs) + list(REMOVE_ITEM archs "i386") + message(STATUS "Disabling i386 slice for simulator") + endif() endif() if(${os} MATCHES "^ios$") From 295d6b18f77fc67c186c031204a82ff82cf59daa Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 3 Dec 2024 12:04:04 +0800 Subject: [PATCH 020/191] [InstCombine] Fold `(X * (Y << K)) u>> K -> X * Y` when highbits are not demanded (#111151) Alive2: https://alive2.llvm.org/ce/z/Z7QgjH --- .../InstCombineSimplifyDemanded.cpp | 9 +++ llvm/test/Transforms/InstCombine/lshr.ll | 62 +++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 392c5c78345c2..943598a30f040 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -776,6 +776,15 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, return InsertNewInstWith(Shl, I->getIterator()); } } + + const APInt *Factor; + if (match(I->getOperand(0), + m_OneUse(m_Mul(m_Value(X), m_APInt(Factor)))) && + Factor->countr_zero() >= ShiftAmt) { + BinaryOperator *Mul = BinaryOperator::CreateMul( + X, ConstantInt::get(X->getType(), Factor->lshr(ShiftAmt))); + return InsertNewInstWith(Mul, I->getIterator()); + } } // Unsigned shift right. diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index 00626015d2ed7..ff358c6bc772a 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -1523,3 +1523,65 @@ define <2 x i8> @bool_add_lshr_vec_wrong_shift_amt(<2 x i1> %a, <2 x i1> %b) { %lshr = lshr <2 x i8> %add, ret <2 x i8> %lshr } + +define i32 @lowbits_of_lshr_mul(i64 %x) { +; CHECK-LABEL: @lowbits_of_lshr_mul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = mul i32 [[TMP0]], 15 +; CHECK-NEXT: ret i32 [[CONV]] +; +entry: + %mul = mul i64 %x, 64424509440 + %shift = lshr i64 %mul, 32 + %conv = trunc i64 %shift to i32 + ret i32 %conv +} + +define i32 @lowbits_of_lshr_mul_mask(i32 %x) { +; CHECK-LABEL: @lowbits_of_lshr_mul_mask( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[X:%.*]], 1600 +; CHECK-NEXT: [[CONV:%.*]] = and i32 [[TMP0]], 32704 +; CHECK-NEXT: ret i32 [[CONV]] +; +entry: + %mul = mul i32 %x, 104857600 + %shift = lshr i32 %mul, 16 + %conv = and i32 %shift, 32767 + ret i32 %conv +} + +; Negative tests + +define i32 @lowbits_of_lshr_mul_mask_multiuse(i32 %x) { +; CHECK-LABEL: @lowbits_of_lshr_mul_mask_multiuse( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], 104857600 +; CHECK-NEXT: call void @use(i32 [[MUL]]) +; CHECK-NEXT: [[SHIFT:%.*]] = lshr exact i32 [[MUL]], 16 +; CHECK-NEXT: [[CONV:%.*]] = and i32 [[SHIFT]], 32704 +; CHECK-NEXT: ret i32 [[CONV]] +; +entry: + %mul = mul i32 %x, 104857600 + call void @use(i32 %mul) + %shift = lshr i32 %mul, 16 + %conv = and i32 %shift, 32767 + ret i32 %conv +} + +define i32 @lowbits_of_lshr_mul_mask_indivisible(i32 %x) { +; CHECK-LABEL: @lowbits_of_lshr_mul_mask_indivisible( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], 25600 +; CHECK-NEXT: [[SHIFT:%.*]] = lshr i32 [[MUL]], 16 +; CHECK-NEXT: [[CONV:%.*]] = and i32 [[SHIFT]], 32767 +; CHECK-NEXT: ret i32 [[CONV]] +; +entry: + %mul = mul i32 %x, 25600 + %shift = lshr i32 %mul, 16 + %conv = and i32 %shift, 32767 + ret i32 %conv +} From 6ef4990daa1da215b25b1802f5d03cf1044f72bf Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 3 Dec 2024 14:34:18 +1100 Subject: [PATCH 021/191] Re-apply "[ORC] Track all dependencies on symbols that aren't..." with fixes. This reapplies 427fb5cc5ac, which was reverted in 08c1a6b3e18 due to bot failures. The fix was to remove an incorrect assertion: In IL_emit, during the initial worklist loop, an EDU can have all of its dependencies removed without becoming ready (because it may still have implicit dependencies that will be added back during the subsequent propagateExtraEmitDeps operation). The EDU will be marked Ready at the end of IL_emit if its Dependencies set is empty at that point. Prior to that we can only assert that it's either Emitted or Ready (which is already covered by other assertions). --- llvm/lib/ExecutionEngine/Orc/Core.cpp | 15 +++-- .../ExecutionEngine/Orc/CoreAPIsTest.cpp | 65 +++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index f226e81cc02a6..8502287016413 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -938,7 +938,6 @@ Error JITDylib::resolve(MaterializationResponsibility &MR, auto &MI = MII->second; for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) { Q->notifySymbolMetRequiredState(Name, ResolvedSym); - Q->removeQueryDependence(*this, Name); if (Q->isComplete()) CompletedQueries.insert(std::move(Q)); } @@ -1207,9 +1206,8 @@ void JITDylib::MaterializingInfo::removeQuery( PendingQueries, [&Q](const std::shared_ptr &V) { return V.get() == &Q; }); - assert(I != PendingQueries.end() && - "Query is not attached to this MaterializingInfo"); - PendingQueries.erase(I); + if (I != PendingQueries.end()) + PendingQueries.erase(I); } JITDylib::AsynchronousSymbolQueryList @@ -2615,6 +2613,12 @@ void ExecutionSession::OL_completeLookup( LLVM_DEBUG(dbgs() << "matched, symbol already in required state\n"); Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); + + // If this symbol is in anything other than the Ready state then + // we need to track the dependence. + if (SymI->second.getState() != SymbolState::Ready) + Q->addQueryDependence(JD, Name); + return true; } @@ -3165,7 +3169,6 @@ void ExecutionSession::IL_makeEDUEmitted( Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol()); if (Q->isComplete()) Queries.insert(Q); - Q->removeQueryDependence(JD, SymbolStringPtr(Sym)); } } @@ -3317,8 +3320,6 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, auto &DepMI = DepJD->MaterializingInfos[SymbolStringPtr(Dep)]; assert(DepMI.DefiningEDU && "Emitted symbol does not have a defining EDU"); - assert(!DepMI.DefiningEDU->Dependencies.empty() && - "Emitted symbol has empty dependencies (should be ready)"); assert(DepMI.DependantEDUs.empty() && "Already-emitted symbol has dependant EDUs?"); auto &DepEDUInfo = EDUInfos[DepMI.DefiningEDU.get()]; diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index a907dfcf2cec5..8ae05c4ddc59a 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -518,6 +518,71 @@ TEST_F(CoreAPIsStandardTest, TestTrivialCircularDependency) { << "Self-dependency prevented symbol from being marked ready"; } +TEST_F(CoreAPIsStandardTest, TestBasicQueryDependenciesReporting) { + // Test that dependencies are reported as expected. + + bool DependenciesCallbackRan = false; + + std::unique_ptr FooR; + std::unique_ptr BarR; + + cantFail(JD.define(std::make_unique( + SymbolFlagsMap({{Foo, FooSym.getFlags()}}), + [&](std::unique_ptr R) { + FooR = std::move(R); + }))); + + cantFail(JD.define(std::make_unique( + SymbolFlagsMap({{Bar, BarSym.getFlags()}}), + [&](std::unique_ptr R) { + BarR = std::move(R); + }))); + + cantFail(JD.define(std::make_unique( + SymbolFlagsMap({{Baz, BazSym.getFlags()}}), + [&](std::unique_ptr R) { + cantFail(R->notifyResolved({{Baz, BazSym}})); + cantFail(R->notifyEmitted({})); + }))); + + // First issue a lookup for Foo and Bar so that we can put them + // into the required states for the test lookup below. + ES.lookup( + LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Foo, Bar}), SymbolState::Resolved, + [](Expected Result) { + EXPECT_THAT_EXPECTED(std::move(Result), Succeeded()); + }, + NoDependenciesToRegister); + + cantFail(FooR->notifyResolved({{Foo, FooSym}})); + cantFail(FooR->notifyEmitted({})); + + cantFail(BarR->notifyResolved({{Bar, BarSym}})); + + ES.lookup( + LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Foo, Bar, Baz}), SymbolState::Resolved, + [](Expected Result) { + EXPECT_THAT_EXPECTED(std::move(Result), Succeeded()); + }, + [&](const SymbolDependenceMap &Dependencies) { + EXPECT_EQ(Dependencies.size(), 1U) + << "Expect dependencies on only one JITDylib"; + EXPECT_TRUE(Dependencies.count(&JD)) + << "Expect dependencies on JD only"; + auto &Deps = Dependencies.begin()->second; + EXPECT_EQ(Deps.size(), 2U); + EXPECT_TRUE(Deps.count(Bar)); + EXPECT_TRUE(Deps.count(Baz)); + DependenciesCallbackRan = true; + }); + + cantFail(BarR->notifyEmitted({})); + + EXPECT_TRUE(DependenciesCallbackRan); +} + TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { // Test that a circular symbol dependency between three symbols in a JITDylib // does not prevent any symbol from becoming 'ready' once all symbols are From aba6bb0820b247d4caf4b5e00810909214a58053 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 3 Dec 2024 15:30:24 +1100 Subject: [PATCH 022/191] [ORC][JITLink] Add jitlink::Scope::SideEffectsOnly, use it in ORC Platforms. SideEffectsOnly is a new jitlink::Scope value that corresponds to the JITSymbolFlags::MaterializationSideEffectsOnly flag: Symbols with this scope can be looked up (and form part of the initial interface of a LinkGraph) but never actually resolve to an address (so can only be looked up with a WeaklyReferencedSymbol lookup). Previously ObjectLinkingLayer implicitly treated JITLink symbols as having this scope, regardless of a Symbol's actual scope, if the MaterializationSideEffectsOnly flag was set on the corresponding symbol in the MaterializationResponsibility object. Using an explicit scope in JITLink for this (1) allows JITLink plugins to identify and correctly handle side-effects-only symbols, and (2) allows raw LinkGraphs to define side-effects-only symbols without clients having to manually modify their `MaterializationUnit::Interface`. --- llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h | 3 +++ llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp | 2 +- llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp | 2 +- llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 6 +++--- llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 12 ++++++------ 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 9844214c537a0..2831ebb3be798 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -393,10 +393,13 @@ const char *getLinkageName(Linkage L); /// Defines the scope in which this symbol should be visible: /// Default -- Visible in the public interface of the linkage unit. /// Hidden -- Visible within the linkage unit, but not exported from it. +/// SideEffectsOnly -- Like hidden, but symbol can only be looked up once +/// to trigger materialization of the containing graph. /// Local -- Visible only within the LinkGraph. enum class Scope : uint8_t { Default, Hidden, + SideEffectsOnly, Local }; diff --git a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp index 007e18e307399..9d655c2e1e8be 100644 --- a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp @@ -76,7 +76,7 @@ class COFFHeaderMaterializationUnit : public MaterializationUnit { // Init symbol is __ImageBase symbol. auto &ImageBaseSymbol = G->addDefinedSymbol( HeaderBlock, 0, *R->getInitializerSymbol(), HeaderBlock.getSize(), - jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); + jitlink::Linkage::Strong, jitlink::Scope::SideEffectsOnly, false, true); addImageBaseRelocationEdge(HeaderBlock, ImageBaseSymbol); diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index 431c64996b2c5..44d282fc6ff67 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -197,7 +197,7 @@ class DSOHandleMaterializationUnit : public MaterializationUnit { 8, 0); auto &DSOHandleSymbol = G->addDefinedSymbol( DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(), - jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); + jitlink::Linkage::Strong, jitlink::Scope::SideEffectsOnly, false, true); DSOHandleBlock.addEdge(EdgeKind, 0, DSOHandleSymbol, 0); ENP.getObjectLinkingLayer().emit(std::move(R), std::move(G)); diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 3e02beb0baa86..1b18a4d0596c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -1001,9 +1001,9 @@ Error MachOPlatform::MachOPlatformPlugin::preserveImportantSections( // to the first block. if (!InitSym) { auto &B = **InitSection->blocks().begin(); - InitSym = &G.addDefinedSymbol(B, 0, *InitSymName, B.getSize(), - jitlink::Linkage::Strong, - jitlink::Scope::Default, false, true); + InitSym = &G.addDefinedSymbol( + B, 0, *InitSymName, B.getSize(), jitlink::Linkage::Strong, + jitlink::Scope::SideEffectsOnly, false, true); } // Add keep-alive edges to anonymous symbols in all other init blocks. diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index c1c55408c7858..c5342c4f4deb3 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -65,6 +65,8 @@ JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { if (Sym.getScope() == Scope::Default) Flags |= JITSymbolFlags::Exported; + else if (Sym.getScope() == Scope::SideEffectsOnly) + Flags |= JITSymbolFlags::MaterializationSideEffectsOnly; if (Sym.isCallable()) Flags |= JITSymbolFlags::Callable; @@ -236,7 +238,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { SymbolMap InternedResult; for (auto *Sym : G.defined_symbols()) - if (Sym->getScope() != Scope::Local) { + if (Sym->getScope() < Scope::SideEffectsOnly) { auto InternedName = ES.intern(Sym->getName()); auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); @@ -249,7 +251,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } for (auto *Sym : G.absolute_symbols()) - if (Sym->getScope() != Scope::Local) { + if (Sym->getScope() < Scope::SideEffectsOnly) { auto InternedName = ES.intern(Sym->getName()); auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); @@ -281,11 +283,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { // If this is a materialization-side-effects only symbol then bump // the counter and remove in from the result, otherwise make sure that // it's defined. - if (Flags.hasMaterializationSideEffectsOnly()) { + if (Flags.hasMaterializationSideEffectsOnly()) ++NumMaterializationSideEffectsOnlySymbols; - InternedResult.erase(Sym); - continue; - } else if (I == InternedResult.end()) + else if (I == InternedResult.end()) MissingSymbols.push_back(Sym); else if (Layer.OverrideObjectFlags) I->second.setFlags(Flags); From 28e2a8912102ac442166a593f2efb15c5bd30346 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 3 Dec 2024 16:56:23 +1100 Subject: [PATCH 023/191] Revert "[ORC][JITLink] Add jitlink::Scope::SideEffectsOnly, use it in ORC Platforms." This reverts commit aba6bb0820b247d4caf4b5e00810909214a58053 while I investigate bot failures (e.g. https://lab.llvm.org/buildbot/#/builders/143/builds/3848) --- llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h | 3 --- llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp | 2 +- llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp | 2 +- llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 6 +++--- llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 12 ++++++------ 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 2831ebb3be798..9844214c537a0 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -393,13 +393,10 @@ const char *getLinkageName(Linkage L); /// Defines the scope in which this symbol should be visible: /// Default -- Visible in the public interface of the linkage unit. /// Hidden -- Visible within the linkage unit, but not exported from it. -/// SideEffectsOnly -- Like hidden, but symbol can only be looked up once -/// to trigger materialization of the containing graph. /// Local -- Visible only within the LinkGraph. enum class Scope : uint8_t { Default, Hidden, - SideEffectsOnly, Local }; diff --git a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp index 9d655c2e1e8be..007e18e307399 100644 --- a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp @@ -76,7 +76,7 @@ class COFFHeaderMaterializationUnit : public MaterializationUnit { // Init symbol is __ImageBase symbol. auto &ImageBaseSymbol = G->addDefinedSymbol( HeaderBlock, 0, *R->getInitializerSymbol(), HeaderBlock.getSize(), - jitlink::Linkage::Strong, jitlink::Scope::SideEffectsOnly, false, true); + jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); addImageBaseRelocationEdge(HeaderBlock, ImageBaseSymbol); diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index 44d282fc6ff67..431c64996b2c5 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -197,7 +197,7 @@ class DSOHandleMaterializationUnit : public MaterializationUnit { 8, 0); auto &DSOHandleSymbol = G->addDefinedSymbol( DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(), - jitlink::Linkage::Strong, jitlink::Scope::SideEffectsOnly, false, true); + jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); DSOHandleBlock.addEdge(EdgeKind, 0, DSOHandleSymbol, 0); ENP.getObjectLinkingLayer().emit(std::move(R), std::move(G)); diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 1b18a4d0596c1..3e02beb0baa86 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -1001,9 +1001,9 @@ Error MachOPlatform::MachOPlatformPlugin::preserveImportantSections( // to the first block. if (!InitSym) { auto &B = **InitSection->blocks().begin(); - InitSym = &G.addDefinedSymbol( - B, 0, *InitSymName, B.getSize(), jitlink::Linkage::Strong, - jitlink::Scope::SideEffectsOnly, false, true); + InitSym = &G.addDefinedSymbol(B, 0, *InitSymName, B.getSize(), + jitlink::Linkage::Strong, + jitlink::Scope::Default, false, true); } // Add keep-alive edges to anonymous symbols in all other init blocks. diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index c5342c4f4deb3..c1c55408c7858 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -65,8 +65,6 @@ JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { if (Sym.getScope() == Scope::Default) Flags |= JITSymbolFlags::Exported; - else if (Sym.getScope() == Scope::SideEffectsOnly) - Flags |= JITSymbolFlags::MaterializationSideEffectsOnly; if (Sym.isCallable()) Flags |= JITSymbolFlags::Callable; @@ -238,7 +236,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { SymbolMap InternedResult; for (auto *Sym : G.defined_symbols()) - if (Sym->getScope() < Scope::SideEffectsOnly) { + if (Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); @@ -251,7 +249,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } for (auto *Sym : G.absolute_symbols()) - if (Sym->getScope() < Scope::SideEffectsOnly) { + if (Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); @@ -283,9 +281,11 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { // If this is a materialization-side-effects only symbol then bump // the counter and remove in from the result, otherwise make sure that // it's defined. - if (Flags.hasMaterializationSideEffectsOnly()) + if (Flags.hasMaterializationSideEffectsOnly()) { ++NumMaterializationSideEffectsOnlySymbols; - else if (I == InternedResult.end()) + InternedResult.erase(Sym); + continue; + } else if (I == InternedResult.end()) MissingSymbols.push_back(Sym); else if (Layer.OverrideObjectFlags) I->second.setFlags(Flags); From dac9736d05f51b47633b51d599a07ff8d1d65df3 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 06:59:56 +0100 Subject: [PATCH 024/191] [clang][bytecode][NFC] Diagnose no-constexpr memcpy/strlen versions (#118429) --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index d7e32c491b03b..aab380c0925d2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -222,11 +222,25 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, return true; } +static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC, + unsigned ID) { + auto Loc = S.Current->getSource(OpPC); + if (S.getLangOpts().CPlusPlus11) + S.CCEDiag(Loc, diag::note_constexpr_invalid_function) + << /*isConstexpr=*/0 << /*isConstructor=*/0 + << ("'" + S.getASTContext().BuiltinInfo.getName(ID) + "'").str(); + else + S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); +} static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, - const CallExpr *Call) { + const Function *Func, const CallExpr *Call) { + unsigned ID = Func->getBuiltinID(); const Pointer &StrPtr = getParam(Frame, 0); + if (ID == Builtin::BIstrlen) + diagnoseNonConstexprBuiltin(S, OpPC, ID); + if (!CheckArray(S, OpPC, StrPtr)) return false; @@ -1781,12 +1795,16 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *Func, const CallExpr *Call) { assert(Call->getNumArgs() == 3); + unsigned ID = Func->getBuiltinID(); Pointer DestPtr = getParam(Frame, 0); const Pointer &SrcPtr = getParam(Frame, 1); const APSInt &Size = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2))); assert(!Size.isSigned() && "memcpy and friends take an unsigned size"); + if (ID == Builtin::BImemcpy) + diagnoseNonConstexprBuiltin(S, OpPC, ID); + if (DestPtr.isDummy() || SrcPtr.isDummy()) return false; @@ -1830,7 +1848,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, return false; break; case Builtin::BI__builtin_strlen: - if (!interp__builtin_strlen(S, OpPC, Frame, Call)) + case Builtin::BIstrlen: + if (!interp__builtin_strlen(S, OpPC, Frame, F, Call)) return false; break; case Builtin::BI__builtin_nan: @@ -2271,6 +2290,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, break; case Builtin::BI__builtin_memcpy: + case Builtin::BImemcpy: if (!interp__builtin_memcpy(S, OpPC, Frame, F, Call)) return false; break; From de415fbb450d0e15c535f0ccc135e2368a15bf6f Mon Sep 17 00:00:00 2001 From: Rajat Bajpai Date: Tue, 3 Dec 2024 11:31:36 +0530 Subject: [PATCH 025/191] [InstCombine][FP] Fix nnan preservation for transform fcmp + sel => fmax/fmin (#117977) Preserve `nnan` constraint only if present on both `fcmp` and `select`. Alive2: https://alive2.llvm.org/ce/z/ZNDjzt --- .../InstCombine/InstCombineSelect.cpp | 22 ++++++--- .../InstCombine/fcmp-fadd-select.ll | 46 +++++++++---------- .../Transforms/InstCombine/fcmp-select.ll | 4 +- llvm/test/Transforms/InstCombine/fneg.ll | 2 +- llvm/test/Transforms/InstCombine/minmax-fp.ll | 8 ++-- .../InstCombine/unordered-fcmp-select.ll | 2 +- 6 files changed, 47 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index dde35fe3f69dd..c7a0c35d099cc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3897,17 +3897,27 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (SIFPOp) { // TODO: Try to forward-propagate FMF from select arms to the select. + auto *FCmp = dyn_cast(CondVal); + // Canonicalize select of FP values where NaN and -0.0 are not valid as // minnum/maxnum intrinsics. if (SIFPOp->hasNoNaNs() && SIFPOp->hasNoSignedZeros()) { Value *X, *Y; - if (match(&SI, m_OrdOrUnordFMax(m_Value(X), m_Value(Y)))) - return replaceInstUsesWith( - SI, Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI)); + if (match(&SI, m_OrdOrUnordFMax(m_Value(X), m_Value(Y)))) { + Value *BinIntr = + Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI); + if (auto *BinIntrInst = dyn_cast(BinIntr)) + BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs()); + return replaceInstUsesWith(SI, BinIntr); + } - if (match(&SI, m_OrdOrUnordFMin(m_Value(X), m_Value(Y)))) - return replaceInstUsesWith( - SI, Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI)); + if (match(&SI, m_OrdOrUnordFMin(m_Value(X), m_Value(Y)))) { + Value *BinIntr = + Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI); + if (auto *BinIntrInst = dyn_cast(BinIntr)) + BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs()); + return replaceInstUsesWith(SI, BinIntr); + } } } diff --git a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll index c49aea3e82b56..0d0af91608e7a 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-fadd-select.ll @@ -6,7 +6,7 @@ define float @test_fcmp_ogt_fadd_select_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -19,7 +19,7 @@ define float @test_fcmp_ogt_fadd_select_constant(float %in) { define float @test_fcmp_ogt_fadd_select_constant_swapped(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_constant_swapped( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -32,7 +32,7 @@ define float @test_fcmp_ogt_fadd_select_constant_swapped(float %in) { define float @test_fcmp_ogt_fadd_select_neg_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_neg_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -45,7 +45,7 @@ define float @test_fcmp_ogt_fadd_select_neg_constant(float %in) { define float @test_fcmp_ogt_fadd_select_fastmath_preserve(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_fastmath_preserve( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -58,7 +58,7 @@ define float @test_fcmp_ogt_fadd_select_fastmath_preserve(float %in) { define <2 x float> @test_fcmp_ogt_fadd_select_constant_vectors(<2 x float> %in) { ; CHECK-LABEL: define <2 x float> @test_fcmp_ogt_fadd_select_constant_vectors( ; CHECK-SAME: <2 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz <2 x float> [[SEL_NEW]], splat (float 1.000000e+00) ; CHECK-NEXT: ret <2 x float> [[ADD_NEW]] ; @@ -74,7 +74,7 @@ define <2 x float> @test_fcmp_ogt_fadd_select_constant_vectors(<2 x float> %in) define float @test_fcmp_olt_fadd_select_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_olt_fadd_select_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -87,7 +87,7 @@ define float @test_fcmp_olt_fadd_select_constant(float %in) { define float @test_fcmp_olt_fadd_select_constant_swapped(float %in) { ; CHECK-LABEL: define float @test_fcmp_olt_fadd_select_constant_swapped( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -100,7 +100,7 @@ define float @test_fcmp_olt_fadd_select_constant_swapped(float %in) { define float @test_fcmp_olt_fadd_select_neg_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_olt_fadd_select_neg_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -113,7 +113,7 @@ define float @test_fcmp_olt_fadd_select_neg_constant(float %in) { define float @test_fcmp_olt_fadd_select_fastmath_preserve(float %in) { ; CHECK-LABEL: define float @test_fcmp_olt_fadd_select_fastmath_preserve( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -126,7 +126,7 @@ define float @test_fcmp_olt_fadd_select_fastmath_preserve(float %in) { define <2 x float> @test_fcmp_olt_fadd_select_constant_vectors(<2 x float> %in) { ; CHECK-LABEL: define <2 x float> @test_fcmp_olt_fadd_select_constant_vectors( ; CHECK-SAME: <2 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz <2 x float> [[SEL_NEW]], splat (float 1.000000e+00) ; CHECK-NEXT: ret <2 x float> [[ADD_NEW]] ; @@ -142,7 +142,7 @@ define <2 x float> @test_fcmp_olt_fadd_select_constant_vectors(<2 x float> %in) define float @test_fcmp_oge_fadd_select_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_oge_fadd_select_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -155,7 +155,7 @@ define float @test_fcmp_oge_fadd_select_constant(float %in) { define float @test_fcmp_oge_fadd_select_constant_swapped(float %in) { ; CHECK-LABEL: define float @test_fcmp_oge_fadd_select_constant_swapped( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -168,7 +168,7 @@ define float @test_fcmp_oge_fadd_select_constant_swapped(float %in) { define float @test_fcmp_oge_fadd_select_neg_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_oge_fadd_select_neg_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -181,7 +181,7 @@ define float @test_fcmp_oge_fadd_select_neg_constant(float %in) { define float @test_fcmp_oge_fadd_select_fastmath_preserve(float %in) { ; CHECK-LABEL: define float @test_fcmp_oge_fadd_select_fastmath_preserve( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -194,7 +194,7 @@ define float @test_fcmp_oge_fadd_select_fastmath_preserve(float %in) { define <2 x float> @test_fcmp_oge_fadd_select_constant_vectors(<2 x float> %in) { ; CHECK-LABEL: define <2 x float> @test_fcmp_oge_fadd_select_constant_vectors( ; CHECK-SAME: <2 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz <2 x float> [[SEL_NEW]], splat (float 1.000000e+00) ; CHECK-NEXT: ret <2 x float> [[ADD_NEW]] ; @@ -210,7 +210,7 @@ define <2 x float> @test_fcmp_oge_fadd_select_constant_vectors(<2 x float> %in) define float @test_fcmp_ole_fadd_select_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_ole_fadd_select_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -223,7 +223,7 @@ define float @test_fcmp_ole_fadd_select_constant(float %in) { define float @test_fcmp_ole_fadd_select_constant_swapped(float %in) { ; CHECK-LABEL: define float @test_fcmp_ole_fadd_select_constant_swapped( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -236,7 +236,7 @@ define float @test_fcmp_ole_fadd_select_constant_swapped(float %in) { define float @test_fcmp_ole_fadd_select_neg_constant(float %in) { ; CHECK-LABEL: define float @test_fcmp_ole_fadd_select_neg_constant( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -249,7 +249,7 @@ define float @test_fcmp_ole_fadd_select_neg_constant(float %in) { define float @test_fcmp_ole_fadd_select_fastmath_preserve(float %in) { ; CHECK-LABEL: define float @test_fcmp_ole_fadd_select_fastmath_preserve( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.minnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -262,7 +262,7 @@ define float @test_fcmp_ole_fadd_select_fastmath_preserve(float %in) { define <2 x float> @test_fcmp_ole_fadd_select_constant_vectors(<2 x float> %in) { ; CHECK-LABEL: define <2 x float> @test_fcmp_ole_fadd_select_constant_vectors( ; CHECK-SAME: <2 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[IN]], <2 x float> zeroinitializer) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz <2 x float> [[SEL_NEW]], splat (float 1.000000e+00) ; CHECK-NEXT: ret <2 x float> [[ADD_NEW]] ; @@ -637,7 +637,7 @@ define float @test_fcmp_multiple_uses(float %in) { define float @test_fcmp_ogt_fadd_select_rewrite_flags1(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_rewrite_flags1( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call reassoc nsz arcp contract afn float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -650,7 +650,7 @@ define float @test_fcmp_ogt_fadd_select_rewrite_flags1(float %in) { define float @test_fcmp_ogt_fadd_select_rewrite_flags2(float %in) { ; CHECK-LABEL: define float @test_fcmp_ogt_fadd_select_rewrite_flags2( ; CHECK-SAME: float [[IN:%.*]]) { -; CHECK-NEXT: [[SEL_NEW:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) +; CHECK-NEXT: [[SEL_NEW:%.*]] = call nsz float @llvm.maxnum.f32(float [[IN]], float 0.000000e+00) ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd nnan nsz float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; @@ -667,7 +667,7 @@ define float @test_fcmp_ogt_fadd_select_rewrite_and_fastmath(float %in) { ; CHECK-NEXT: [[ADD_NEW:%.*]] = fadd fast float [[SEL_NEW]], 1.000000e+00 ; CHECK-NEXT: ret float [[ADD_NEW]] ; - %cmp1 = fcmp ogt float %in, 0.000000e+00 + %cmp1 = fcmp nnan ogt float %in, 0.000000e+00 %add = fadd fast reassoc float %in, 1.000000e+00 %sel = select fast i1 %cmp1, float %add, float 1.000000e+00 ret float %sel diff --git a/llvm/test/Transforms/InstCombine/fcmp-select.ll b/llvm/test/Transforms/InstCombine/fcmp-select.ll index 028de1ff8a99f..408bc1cdc268f 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-select.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-select.ll @@ -219,8 +219,8 @@ define double @test_fcmp_select_clamp(double %x) { define double @test_fcmp_select_maxnum(double %x) { ; CHECK-LABEL: @test_fcmp_select_maxnum( -; CHECK-NEXT: [[SEL1:%.*]] = call nnan nsz double @llvm.maxnum.f64(double [[X:%.*]], double 1.000000e+00) -; CHECK-NEXT: [[SEL2:%.*]] = call nnan nsz double @llvm.minnum.f64(double [[SEL1]], double 2.550000e+02) +; CHECK-NEXT: [[SEL1:%.*]] = call nsz double @llvm.maxnum.f64(double [[X:%.*]], double 1.000000e+00) +; CHECK-NEXT: [[SEL2:%.*]] = call nsz double @llvm.minnum.f64(double [[SEL1]], double 2.550000e+02) ; CHECK-NEXT: ret double [[SEL2]] ; %cmp1 = fcmp ogt double %x, 1.0 diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll index 9692005edf2b6..549291f2c4f0d 100644 --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -1099,7 +1099,7 @@ define float @test_fneg_select_constant_var_multiuse(i1 %cond, float %x) { define float @test_fneg_select_maxnum(float %x) { ; CHECK-LABEL: @test_fneg_select_maxnum( -; CHECK-NEXT: [[SEL1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00) +; CHECK-NEXT: [[SEL1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00) ; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SEL1]] ; CHECK-NEXT: ret float [[NEG]] ; diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll index 1276b7b3e3867..4fe8cf374344e 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fp.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll @@ -321,7 +321,7 @@ define double @fneg_fmin(double %x, double %y) { define float @maxnum_ogt_fmf_on_select(float %a, float %b) { ; CHECK-LABEL: @maxnum_ogt_fmf_on_select( -; CHECK-NEXT: [[F:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: [[F:%.*]] = call nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]]) ; CHECK-NEXT: ret float [[F]] ; %cond = fcmp ogt float %a, %b @@ -331,7 +331,7 @@ define float @maxnum_ogt_fmf_on_select(float %a, float %b) { define <2 x float> @maxnum_oge_fmf_on_select(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @maxnum_oge_fmf_on_select( -; CHECK-NEXT: [[F:%.*]] = call nnan ninf nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) +; CHECK-NEXT: [[F:%.*]] = call ninf nsz <2 x float> @llvm.maxnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) ; CHECK-NEXT: ret <2 x float> [[F]] ; %cond = fcmp oge <2 x float> %a, %b @@ -385,7 +385,7 @@ define float @maxnum_no_nnan(float %a, float %b) { define float @minnum_olt_fmf_on_select(float %a, float %b) { ; CHECK-LABEL: @minnum_olt_fmf_on_select( -; CHECK-NEXT: [[F:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: [[F:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) ; CHECK-NEXT: ret float [[F]] ; %cond = fcmp olt float %a, %b @@ -395,7 +395,7 @@ define float @minnum_olt_fmf_on_select(float %a, float %b) { define <2 x float> @minnum_ole_fmf_on_select(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @minnum_ole_fmf_on_select( -; CHECK-NEXT: [[F:%.*]] = call nnan ninf nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) +; CHECK-NEXT: [[F:%.*]] = call ninf nsz <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) ; CHECK-NEXT: ret <2 x float> [[F]] ; %cond = fcmp ole <2 x float> %a, %b diff --git a/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll b/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll index b164dd983a892..178795f9f9a83 100644 --- a/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll +++ b/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll @@ -115,7 +115,7 @@ define float @select_max_ugt_2_use_cmp(float %a, float %b) { ; CHECK-LABEL: @select_max_ugt_2_use_cmp( ; CHECK-NEXT: [[CMP:%.*]] = fcmp reassoc ugt float [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: call void @foo(i1 [[CMP]]) -; CHECK-NEXT: [[SEL:%.*]] = call fast float @llvm.maxnum.f32(float [[A]], float [[B]]) +; CHECK-NEXT: [[SEL:%.*]] = call reassoc ninf nsz arcp contract afn float @llvm.maxnum.f32(float [[A]], float [[B]]) ; CHECK-NEXT: ret float [[SEL]] ; %cmp = fcmp reassoc ugt float %a, %b From 45162635bf657eb0a6cdebc6398fada974981c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 3 Dec 2024 07:14:49 +0100 Subject: [PATCH 026/191] [GlobalISel] Combine [s,z]ext of undef into 0 (#117439) Alternative for https://github.com/llvm/llvm-project/pull/113764 It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching. Is there a limit on the number of patterns? G_ANYEXT of undef -> undef G_SEXT of undef -> 0 G_ZEXT of undef -> 0 The combine is not a member of the post legalizer combiner for AArch64. Test: llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir --- .../include/llvm/Target/GlobalISel/Combine.td | 27 +++++++++- .../AArch64/GlobalISel/combine-cast.mir | 52 +++++++++++++++++++ .../CodeGen/AArch64/extract-vector-elt.ll | 15 ++---- .../combine-amdgpu-cvt-f32-ubyte.mir | 6 +-- .../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 7 +-- 5 files changed, 84 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index b0c63fc7c7b80..dbdc007d9c6fe 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule< // replaced with undef. def propagate_undef_any_op: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root, + (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root, [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -1857,6 +1857,26 @@ class integer_of_opcode : GICombineRule < def integer_of_truncate : integer_of_opcode; +def anyext_undef: GICombineRule< + (defs root:$root), + (match (G_IMPLICIT_DEF $undef), + (G_ANYEXT $root, $undef):$Aext), + (apply [{ Helper.replaceInstWithUndef(*${Aext}); }])>; + +def zext_undef: GICombineRule< + (defs root:$root), + (match (G_IMPLICIT_DEF $undef), + (G_ZEXT $root, $undef):$Zext, + [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Zext}->getOperand(0).getReg())); }]), + (apply [{ Helper.replaceInstWithConstant(*${Zext}, 0); }])>; + +def sext_undef: GICombineRule< + (defs root:$root), + (match (G_IMPLICIT_DEF $undef), + (G_SEXT $root, $undef):$Sext, + [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Sext}->getOperand(0).getReg())); }]), + (apply [{ Helper.replaceInstWithConstant(*${Sext}, 0); }])>; + def cast_of_cast_combines: GICombineGroup<[ truncate_of_zext, truncate_of_sext, @@ -1882,7 +1902,10 @@ def cast_combines: GICombineGroup<[ narrow_binop_and, narrow_binop_or, narrow_binop_xor, - integer_of_truncate + integer_of_truncate, + anyext_undef, + sext_undef, + zext_undef ]>; def canonicalize_icmp : GICombineRule< diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir index b045deebc56e0..25161652dafac 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir @@ -217,3 +217,55 @@ body: | %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>) $q0 = COPY %large(<2 x s64>) $d0 = COPY %bv(<2 x s32>) +... +--- +name: test_combine_anyext_undef +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_anyext_undef + ; CHECK-PRE: %aext:_(s64) = G_IMPLICIT_DEF + ; CHECK-PRE-NEXT: $x0 = COPY %aext(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_anyext_undef + ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK-POST-NEXT: %aext:_(s64) = G_ANYEXT %undef(s32) + ; CHECK-POST-NEXT: $x0 = COPY %aext(s64) + %undef:_(s32) = G_IMPLICIT_DEF + %aext:_(s64) = G_ANYEXT %undef(s32) + $x0 = COPY %aext(s64) +... +--- +name: test_combine_sext_undef +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_sext_undef + ; CHECK-PRE: %sext:_(s64) = G_CONSTANT i64 0 + ; CHECK-PRE-NEXT: $x0 = COPY %sext(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_sext_undef + ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK-POST-NEXT: %sext:_(s64) = G_SEXT %undef(s32) + ; CHECK-POST-NEXT: $x0 = COPY %sext(s64) + %undef:_(s32) = G_IMPLICIT_DEF + %sext:_(s64) = G_SEXT %undef(s32) + $x0 = COPY %sext(s64) +... +--- +name: test_combine_zext_undef +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_zext_undef + ; CHECK-PRE: %zext:_(s64) = G_CONSTANT i64 0 + ; CHECK-PRE-NEXT: $x0 = COPY %zext(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_zext_undef + ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK-POST-NEXT: %zext:_(s64) = G_ZEXT %undef(s32) + ; CHECK-POST-NEXT: $x0 = COPY %zext(s64) + %undef:_(s32) = G_IMPLICIT_DEF + %zext:_(s64) = G_ZEXT %undef(s32) + $x0 = COPY %zext(s64) +... diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index 5e5fdd6d31705..e89e1516fb1f5 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -8,17 +8,10 @@ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) { -; CHECK-SD-LABEL: extract_v2i64_undef_index: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: extract_v2i64_undef_index: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str q0, [sp, #-16]! -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: ldr x0, [sp], #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: extract_v2i64_undef_index: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret entry: %d = extractelement <2 x i64> %a, i32 undef ret i64 %d diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir index 7893bfa1d38f0..9b39afd32ac37 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir @@ -261,8 +261,7 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16) + ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext ; CHECK-NEXT: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 @@ -284,8 +283,7 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16) + ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext ; CHECK-NEXT: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll index a1a466fb04440..384a2c63122b8 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -4074,14 +4074,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; VI-GISEL-NEXT: v_not_b32_e32 v2, 31 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 -; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) ; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; @@ -4191,15 +4189,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] -; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) ; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3 -; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; From 789551362e20c1adf1f292a256b7276c2045d4e1 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 07:56:06 +0100 Subject: [PATCH 027/191] [clang][bytecode] Handle memmove like memcpy (#118431) This is the same thing for us, except for diagnostic differences. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++-- clang/test/AST/ByteCode/builtin-functions.cpp | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index aab380c0925d2..f3024dc3e26fe 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1802,9 +1802,11 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2))); assert(!Size.isSigned() && "memcpy and friends take an unsigned size"); - if (ID == Builtin::BImemcpy) + if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove) diagnoseNonConstexprBuiltin(S, OpPC, ID); + bool Move = (ID == Builtin::BI__builtin_memmove || ID == Builtin::BImemmove); + if (DestPtr.isDummy() || SrcPtr.isDummy()) return false; @@ -1817,7 +1819,7 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, if (SrcPtr.isZero() || DestPtr.isZero()) { Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr); S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_null) - << /*IsMove=*/false << /*IsWchar=*/false << !SrcPtr.isZero() + << /*IsMove=*/Move << /*IsWchar=*/false << !SrcPtr.isZero() << DiagPtr.toDiagnosticString(S.getASTContext()); return false; } @@ -2291,6 +2293,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, case Builtin::BI__builtin_memcpy: case Builtin::BImemcpy: + case Builtin::BI__builtin_memmove: + case Builtin::BImemmove: if (!interp__builtin_memcpy(S, OpPC, Frame, F, Call)) return false; break; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index dfee35d6399a6..b94adfa3ab36b 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1146,4 +1146,11 @@ namespace BuiltinMemcpy { // both-note {{source of 'memcpy' is nullptr}} + constexpr int simpleMove() { + int a = 12; + int b = 0; + __builtin_memmove(&b, &a, sizeof(a)); + return b; + } + static_assert(simpleMove() == 12); } From e776484a02194986028424ef5a5a782de0a681c2 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 2 Dec 2024 23:22:33 -0800 Subject: [PATCH 028/191] [InstructionCost] Optimize operator== `!(*this < RHS) && !(RHS < *this)` is difficult for the optimizer to reason about. --- llvm/include/llvm/Support/InstructionCost.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h index ada0b8962881d..b5af0e0401ef2 100644 --- a/llvm/include/llvm/Support/InstructionCost.h +++ b/llvm/include/llvm/Support/InstructionCost.h @@ -198,10 +198,8 @@ class InstructionCost { return Value < RHS.Value; } - // Implement in terms of operator< to ensure that the two comparisons stay in - // sync bool operator==(const InstructionCost &RHS) const { - return !(*this < RHS) && !(RHS < *this); + return State == RHS.State && Value == RHS.Value; } bool operator!=(const InstructionCost &RHS) const { return !(*this == RHS); } From 83cbb170449b17aa3e1ada514c94d9e9b4f0fca6 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 3 Dec 2024 07:40:17 +0000 Subject: [PATCH 029/191] [AMDGPU] Refine AMDGPUAtomicOptimizerImpl class. NFC. (#118302) Use references instead of pointers for most state and common up some of the initialization between the legacy and new pass manager paths. --- .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 86 +++++++++---------- 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index e4ca1ae0499b9..c09c71c83fead 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -66,11 +66,12 @@ class AMDGPUAtomicOptimizer : public FunctionPass { class AMDGPUAtomicOptimizerImpl : public InstVisitor { private: + Function &F; SmallVector ToReplace; - const UniformityInfo *UA; - const DataLayout *DL; + const UniformityInfo &UA; + const DataLayout &DL; DomTreeUpdater &DTU; - const GCNSubtarget *ST; + const GCNSubtarget &ST; bool IsPixelShader; ScanOptions ScanImpl; @@ -91,13 +92,14 @@ class AMDGPUAtomicOptimizerImpl public: AMDGPUAtomicOptimizerImpl() = delete; - AMDGPUAtomicOptimizerImpl(const UniformityInfo *UA, const DataLayout *DL, - DomTreeUpdater &DTU, const GCNSubtarget *ST, - bool IsPixelShader, ScanOptions ScanImpl) - : UA(UA), DL(DL), DTU(DTU), ST(ST), IsPixelShader(IsPixelShader), + AMDGPUAtomicOptimizerImpl(Function &F, const UniformityInfo &UA, + DomTreeUpdater &DTU, const GCNSubtarget &ST, + ScanOptions ScanImpl) + : F(F), UA(UA), DL(F.getDataLayout()), DTU(DTU), ST(ST), + IsPixelShader(F.getCallingConv() == CallingConv::AMDGPU_PS), ScanImpl(ScanImpl) {} - bool run(Function &F); + bool run(); void visitAtomicRMWInst(AtomicRMWInst &I); void visitIntrinsicInst(IntrinsicInst &I); @@ -114,40 +116,30 @@ bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) { return false; } - const UniformityInfo *UA = - &getAnalysis().getUniformityInfo(); - const DataLayout *DL = &F.getDataLayout(); + const UniformityInfo &UA = + getAnalysis().getUniformityInfo(); - DominatorTreeWrapperPass *const DTW = + DominatorTreeWrapperPass *DTW = getAnalysisIfAvailable(); DomTreeUpdater DTU(DTW ? &DTW->getDomTree() : nullptr, DomTreeUpdater::UpdateStrategy::Lazy); const TargetPassConfig &TPC = getAnalysis(); const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget *ST = &TM.getSubtarget(F); + const GCNSubtarget &ST = TM.getSubtarget(F); - bool IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS; - - return AMDGPUAtomicOptimizerImpl(UA, DL, DTU, ST, IsPixelShader, ScanImpl) - .run(F); + return AMDGPUAtomicOptimizerImpl(F, UA, DTU, ST, ScanImpl).run(); } PreservedAnalyses AMDGPUAtomicOptimizerPass::run(Function &F, FunctionAnalysisManager &AM) { - - const auto *UA = &AM.getResult(F); - const DataLayout *DL = &F.getDataLayout(); + const auto &UA = AM.getResult(F); DomTreeUpdater DTU(&AM.getResult(F), DomTreeUpdater::UpdateStrategy::Lazy); - const GCNSubtarget *ST = &TM.getSubtarget(F); - - bool IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS; + const GCNSubtarget &ST = TM.getSubtarget(F); - bool IsChanged = - AMDGPUAtomicOptimizerImpl(UA, DL, DTU, ST, IsPixelShader, ScanImpl) - .run(F); + bool IsChanged = AMDGPUAtomicOptimizerImpl(F, UA, DTU, ST, ScanImpl).run(); if (!IsChanged) { return PreservedAnalyses::all(); @@ -158,7 +150,7 @@ PreservedAnalyses AMDGPUAtomicOptimizerPass::run(Function &F, return PA; } -bool AMDGPUAtomicOptimizerImpl::run(Function &F) { +bool AMDGPUAtomicOptimizerImpl::run() { // Scan option None disables the Pass if (ScanImpl == ScanOptions::None) { @@ -234,18 +226,18 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) { // If the pointer operand is divergent, then each lane is doing an atomic // operation on a different address, and we cannot optimize that. - if (UA->isDivergentUse(I.getOperandUse(PtrIdx))) { + if (UA.isDivergentUse(I.getOperandUse(PtrIdx))) { return; } - bool ValDivergent = UA->isDivergentUse(I.getOperandUse(ValIdx)); + bool ValDivergent = UA.isDivergentUse(I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if // we have DPP available on our subtarget (for DPP strategy), and the atomic // operation is 32 or 64 bits. if (ValDivergent) { - if (ScanImpl == ScanOptions::DPP && !ST->hasDPP()) + if (ScanImpl == ScanOptions::DPP && !ST.hasDPP()) return; if (!isLegalCrossLaneType(I.getType())) @@ -324,14 +316,14 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { const unsigned ValIdx = 0; - const bool ValDivergent = UA->isDivergentUse(I.getOperandUse(ValIdx)); + const bool ValDivergent = UA.isDivergentUse(I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if // we have DPP available on our subtarget (for DPP strategy), and the atomic // operation is 32 or 64 bits. if (ValDivergent) { - if (ScanImpl == ScanOptions::DPP && !ST->hasDPP()) + if (ScanImpl == ScanOptions::DPP && !ST.hasDPP()) return; if (!isLegalCrossLaneType(I.getType())) @@ -341,7 +333,7 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { // If any of the other arguments to the intrinsic are divergent, we can't // optimize the operation. for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) { - if (UA->isDivergentUse(I.getOperandUse(Idx))) { + if (UA.isDivergentUse(I.getOperandUse(Idx))) { return; } } @@ -418,17 +410,17 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B, } // Reduce within each pair of rows (i.e. 32 lanes). - assert(ST->hasPermLaneX16()); + assert(ST.hasPermLaneX16()); Value *Permlanex16Call = B.CreateIntrinsic(AtomicTy, Intrinsic::amdgcn_permlanex16, {PoisonValue::get(AtomicTy), V, B.getInt32(0), B.getInt32(0), B.getFalse(), B.getFalse()}); V = buildNonAtomicBinOp(B, Op, V, Permlanex16Call); - if (ST->isWave32()) { + if (ST.isWave32()) { return V; } - if (ST->hasPermLane64()) { + if (ST.hasPermLane64()) { // Reduce across the upper and lower 32 lanes. Value *Permlane64Call = B.CreateIntrinsic(AtomicTy, Intrinsic::amdgcn_permlane64, V); @@ -461,7 +453,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx), B.getInt32(0xf), B.getInt32(0xf), B.getFalse()})); } - if (ST->hasDPPBroadcasts()) { + if (ST.hasDPPBroadcasts()) { // GFX9 has DPP row broadcast operations. V = buildNonAtomicBinOp( B, Op, V, @@ -479,7 +471,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, // Combine lane 15 into lanes 16..31 (and, for wave 64, lane 47 into lanes // 48..63). - assert(ST->hasPermLaneX16()); + assert(ST.hasPermLaneX16()); Value *PermX = B.CreateIntrinsic(AtomicTy, Intrinsic::amdgcn_permlanex16, {PoisonValue::get(AtomicTy), V, B.getInt32(-1), @@ -490,7 +482,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, B.getInt32(0xa), B.getInt32(0xf), B.getFalse()}); V = buildNonAtomicBinOp(B, Op, V, UpdateDPPCall); - if (!ST->isWave32()) { + if (!ST.isWave32()) { // Combine lane 31 into lanes 32..63. Value *const Lane31 = B.CreateIntrinsic( AtomicTy, Intrinsic::amdgcn_readlane, {V, B.getInt32(31)}); @@ -513,7 +505,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V, Module *M = B.GetInsertBlock()->getModule(); Function *UpdateDPP = Intrinsic::getOrInsertDeclaration( M, Intrinsic::amdgcn_update_dpp, AtomicTy); - if (ST->hasDPPWavefrontShifts()) { + if (ST.hasDPPWavefrontShifts()) { // GFX9 has DPP wavefront shift operations. V = B.CreateCall(UpdateDPP, {Identity, V, B.getInt32(DPP::WAVE_SHR1), B.getInt32(0xf), @@ -535,7 +527,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V, V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}), B.getInt32(16), V}); - if (!ST->isWave32()) { + if (!ST.isWave32()) { // Copy the old lane 31 to the new lane 32. V = B.CreateCall( WriteLane, @@ -560,7 +552,7 @@ std::pair AMDGPUAtomicOptimizerImpl::buildScanIteratively( IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *const Identity, Value *V, Instruction &I, BasicBlock *ComputeLoop, BasicBlock *ComputeEnd) const { auto *Ty = I.getType(); - auto *WaveTy = B.getIntNTy(ST->getWavefrontSize()); + auto *WaveTy = B.getIntNTy(ST.getWavefrontSize()); auto *EntryBB = I.getParent(); auto NeedResult = !I.use_empty(); @@ -698,7 +690,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, Type *const Ty = I.getType(); Type *Int32Ty = B.getInt32Ty(); bool isAtomicFloatingPointTy = Ty->isFloatingPointTy(); - [[maybe_unused]] const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty); + [[maybe_unused]] const unsigned TyBitWidth = DL.getTypeSizeInBits(Ty); // This is the value in the atomic operation we need to combine in order to // reduce the number of atomic operations. @@ -706,7 +698,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, // We need to know how many lanes are active within the wavefront, and we do // this by doing a ballot of active lanes. - Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize()); + Type *const WaveTy = B.getIntNTy(ST.getWavefrontSize()); CallInst *const Ballot = B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue()); @@ -715,7 +707,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, // below us only if its associated index was less than ours. We do this by // using the mbcnt intrinsic. Value *Mbcnt; - if (ST->isWave32()) { + if (ST.isWave32()) { Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, {Ballot, B.getInt32(0)}); } else { @@ -755,7 +747,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, // that they can correctly contribute to the final result. NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); - if (!NeedResult && ST->hasPermLaneX16()) { + if (!NeedResult && ST.hasPermLaneX16()) { // On GFX10 the permlanex16 instruction helps us build a reduction // without too many readlanes and writelanes, which are generally bad // for performance. @@ -767,7 +759,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, // Read the value from the last lane, which has accumulated the values // of each active lane in the wavefront. This will be our new value // which we will provide to the atomic operation. - Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1); + Value *const LastLaneIdx = B.getInt32(ST.getWavefrontSize() - 1); NewV = B.CreateIntrinsic(Ty, Intrinsic::amdgcn_readlane, {NewV, LastLaneIdx}); } From 9692242f51eda6a8c33860d2879ffa291a27e3ca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 2 Dec 2024 23:29:39 -0800 Subject: [PATCH 030/191] [RISCV][GISel] Support f64->f32 fptrunc and f32->f64 fpext without D extension. Add RUN lines to float-convert.ll and double-convert.ll without F extension. --- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 6 +- .../RISCV/GlobalISel/double-convert.ll | 526 ++++++++++++++++++ .../CodeGen/RISCV/GlobalISel/float-convert.ll | 489 ++++++++++++++++ 3 files changed, 1019 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index a8052839b5c6a..911827da06197 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -520,11 +520,13 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder(G_FPTRUNC) .legalFor(ST.hasStdExtD(), {{s32, s64}}) .legalFor(ST.hasStdExtZfh(), {{s16, s32}}) - .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}}); + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}}) + .libcallFor({{s32, s64}}); getActionDefinitionsBuilder(G_FPEXT) .legalFor(ST.hasStdExtD(), {{s64, s32}}) .legalFor(ST.hasStdExtZfh(), {{s32, s16}}) - .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}}); + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}}) + .libcallFor({{s64, s32}}); getActionDefinitionsBuilder(G_FCMP) .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll index 7133d5c100e75..b0711d7fbc772 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll @@ -3,12 +3,34 @@ ; RUN: -target-abi=ilp32d | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64d | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s define float @fcvt_s_d(double %a) nounwind { ; CHECKIFD-LABEL: fcvt_s_d: ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.s.d fa0, fa0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_s_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __truncdfsf2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __truncdfsf2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptrunc double %a to float ret float %1 } @@ -18,6 +40,24 @@ define double @fcvt_d_s(float %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.s fa0, fa0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __extendsfdf2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __extendsfdf2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fpext float %a to double ret double %1 } @@ -27,6 +67,24 @@ define i32 @fcvt_w_d(double %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.w.d a0, fa0, rtz ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_w_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfsi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfsi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 } @@ -36,6 +94,24 @@ define i32 @fcvt_wu_d(double %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rtz ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 } @@ -60,6 +136,34 @@ define i32 @fcvt_wu_d_multiple_use(double %x, ptr %y) nounwind { ; RV64IFD-NEXT: li a0, 1 ; RV64IFD-NEXT: .LBB4_2: ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi +; RV32I-NEXT: bnez a0, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: bnez a1, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %a = fptoui double %x to i32 %b = icmp eq i32 %a, 0 %c = select i1 %b, i32 1, i32 %a @@ -71,6 +175,25 @@ define double @fcvt_d_w(i32 %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.w fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatdidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i32 %a to double ret double %1 } @@ -81,6 +204,26 @@ define double @fcvt_d_w_load(ptr %p) nounwind { ; CHECKIFD-NEXT: lw a0, 0(a0) ; CHECKIFD-NEXT: fcvt.d.w fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatdidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %a = load i32, ptr %p %1 = sitofp i32 %a to double ret double %1 @@ -91,6 +234,26 @@ define double @fcvt_d_wu(i32 %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.wu fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: call __floatundidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i32 %a to double ret double %1 } @@ -107,6 +270,26 @@ define double @fcvt_d_wu_load(ptr %p) nounwind { ; RV64IFD-NEXT: lwu a0, 0(a0) ; RV64IFD-NEXT: fcvt.d.wu fa0, a0 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lwu a0, 0(a0) +; RV64I-NEXT: call __floatundidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %a = load i32, ptr %p %1 = uitofp i32 %a to double ret double %1 @@ -126,6 +309,24 @@ define i64 @fcvt_l_d(double %a) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_l_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfdi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfdi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi double %a to i64 ret i64 %1 } @@ -144,6 +345,24 @@ define i64 @fcvt_lu_d(double %a) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfdi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfdi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui double %a to i64 ret i64 %1 } @@ -164,6 +383,24 @@ define i64 @fmv_x_d(double %a, double %b) nounwind { ; RV64IFD-NEXT: fadd.d fa5, fa0, fa1 ; RV64IFD-NEXT: fmv.x.d a0, fa5 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmv_x_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmv_x_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fadd double %a, %b %2 = bitcast double %1 to i64 ret i64 %2 @@ -183,6 +420,24 @@ define double @fcvt_d_l(i64 %a) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fcvt.d.l fa0, a0 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i64 %a to double ret double %1 } @@ -201,6 +456,24 @@ define double @fcvt_d_lu(i64 %a) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fcvt.d.lu fa0, a0 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i64 %a to double ret double %1 } @@ -225,6 +498,24 @@ define double @fmv_d_x(i64 %a, i64 %b) nounwind { ; RV64IFD-NEXT: fmv.d.x fa4, a1 ; RV64IFD-NEXT: fadd.d fa0, fa5, fa4 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmv_d_x: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmv_d_x: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = bitcast i64 %a to double %2 = bitcast i64 %b to double %3 = fadd double %1, %2 @@ -236,6 +527,24 @@ define double @fcvt_d_w_i8(i8 signext %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.w fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i8 %a to double ret double %1 } @@ -245,6 +554,24 @@ define double @fcvt_d_wu_i8(i8 zeroext %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.wu fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i8 %a to double ret double %1 } @@ -254,6 +581,24 @@ define double @fcvt_d_w_i16(i16 signext %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.w fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i16 %a to double ret double %1 } @@ -263,6 +608,24 @@ define double @fcvt_d_wu_i16(i16 zeroext %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fcvt.d.wu fa0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundidf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i16 %a to double ret double %1 } @@ -281,6 +644,43 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IFD-NEXT: fcvt.d.w fa5, a0 ; RV64IFD-NEXT: fsd fa5, 0(a1) ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsidf +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatdidf +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret %3 = add i32 %0, 1 %4 = sitofp i32 %3 to double store double %4, ptr %1, align 8 @@ -301,6 +701,44 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IFD-NEXT: fcvt.d.wu fa5, a0 ; RV64IFD-NEXT: fsd fa5, 0(a1) ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsidf +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: slli a0, s1, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: call __floatundidf +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret %3 = add i32 %0, 1 %4 = uitofp i32 %3 to double store double %4, ptr %1, align 8 @@ -321,6 +759,28 @@ define signext i16 @fcvt_w_s_i16(double %a) nounwind { ; RV64IFD-NEXT: slli a0, a0, 48 ; RV64IFD-NEXT: srai a0, a0, 48 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfsi +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfsi +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi double %a to i16 ret i16 %1 } @@ -341,6 +801,30 @@ define zeroext i16 @fcvt_wu_s_i16(double %a) nounwind { ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui double %a to i16 ret i16 %1 } @@ -359,6 +843,28 @@ define signext i8 @fcvt_w_s_i8(double %a) nounwind { ; RV64IFD-NEXT: slli a0, a0, 56 ; RV64IFD-NEXT: srai a0, a0, 56 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfsi +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfsi +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi double %a to i8 ret i8 %1 } @@ -369,6 +875,26 @@ define zeroext i8 @fcvt_wu_s_i8(double %a) nounwind { ; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rtz ; CHECKIFD-NEXT: andi a0, a0, 255 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui double %a to i8 ret i8 %1 } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll index e6df28f5f28d1..a14c06726ef5f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll @@ -3,12 +3,34 @@ ; RUN: -target-abi=ilp32f | FileCheck -check-prefixes=CHECKIF,RV32IF %s ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+f -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64f | FileCheck -check-prefixes=CHECKIF,RV64IF %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s define i32 @fcvt_w_s(float %a) nounwind { ; CHECKIF-LABEL: fcvt_w_s: ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.w.s a0, fa0, rtz ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfsi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfsi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi float %a to i32 ret i32 %1 } @@ -18,6 +40,24 @@ define i32 @fcvt_wu_s(float %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rtz ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui float %a to i32 ret i32 %1 } @@ -44,6 +84,34 @@ define i32 @fcvt_wu_s_multiple_use(float %x, ptr %y) nounwind { ; RV64IF-NEXT: li a0, 1 ; RV64IF-NEXT: .LBB2_2: ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi +; RV32I-NEXT: bnez a0, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: bnez a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %a = fptoui float %x to i32 %b = icmp eq i32 %a, 0 %c = select i1 %b, i32 1, i32 %a @@ -63,6 +131,25 @@ define signext i32 @fmv_x_w(float %a, float %b) nounwind { ; RV64IF-NEXT: fmv.x.w a0, fa5 ; RV64IF-NEXT: sext.w a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fmv_x_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmv_x_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret ; Ensure fmv.x.w is generated even for a soft float calling convention %1 = fadd float %a, %b %2 = bitcast float %1 to i32 @@ -74,6 +161,25 @@ define float @fcvt_s_w(i32 %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.s.w fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i32 %a to float ret float %1 } @@ -84,6 +190,26 @@ define float @fcvt_s_w_load(ptr %p) nounwind { ; CHECKIF-NEXT: lw a0, 0(a0) ; CHECKIF-NEXT: fcvt.s.w fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %a = load i32, ptr %p %1 = sitofp i32 %a to float ret float %1 @@ -94,6 +220,26 @@ define float @fcvt_s_wu(i32 %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.s.wu fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i32 %a to float ret float %1 } @@ -110,6 +256,26 @@ define float @fcvt_s_wu_load(ptr %p) nounwind { ; RV64IF-NEXT: lwu a0, 0(a0) ; RV64IF-NEXT: fcvt.s.wu fa0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lwu a0, 0(a0) +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %a = load i32, ptr %p %1 = uitofp i32 %a to float ret float %1 @@ -122,6 +288,24 @@ define float @fmv_w_x(i32 %a, i32 %b) nounwind { ; CHECKIF-NEXT: fmv.w.x fa4, a1 ; CHECKIF-NEXT: fadd.s fa0, fa5, fa4 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmv_w_x: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmv_w_x: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret ; Ensure fmv.w.x is generated even for a soft float calling convention %1 = bitcast i32 %a to float %2 = bitcast i32 %b to float @@ -143,6 +327,24 @@ define i64 @fcvt_l_s(float %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_l_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfdi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfdi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi float %a to i64 ret i64 %1 } @@ -161,6 +363,24 @@ define i64 @fcvt_lu_s(float %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfdi +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfdi +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui float %a to i64 ret i64 %1 } @@ -179,6 +399,24 @@ define float @fcvt_s_l(i64 %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fcvt.s.l fa0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i64 %a to float ret float %1 } @@ -197,6 +435,24 @@ define float @fcvt_s_lu(i64 %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fcvt.s.lu fa0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i64 %a to float ret float %1 } @@ -206,6 +462,24 @@ define float @fcvt_s_w_i8(i8 signext %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.s.w fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i8 %a to float ret float %1 } @@ -215,6 +489,24 @@ define float @fcvt_s_wu_i8(i8 zeroext %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.s.wu fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i8 %a to float ret float %1 } @@ -224,6 +516,24 @@ define float @fcvt_s_w_i16(i16 signext %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.s.w fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = sitofp i16 %a to float ret float %1 } @@ -233,6 +543,24 @@ define float @fcvt_s_wu_i16(i16 zeroext %a) nounwind { ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fcvt.s.wu fa0, a0 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = uitofp i16 %a to float ret float %1 } @@ -252,6 +580,42 @@ define signext i32 @fcvt_s_w_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IF-NEXT: fcvt.s.w fa5, a0 ; RV64IF-NEXT: fsw fa5, 0(a1) ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsisf +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret %3 = add i32 %0, 1 %4 = sitofp i32 %3 to float store float %4, ptr %1, align 4 @@ -273,6 +637,43 @@ define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV64IF-NEXT: fcvt.s.wu fa5, a0 ; RV64IF-NEXT: fsw fa5, 0(a1) ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsisf +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: slli a0, s1, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret %3 = add i32 %0, 1 %4 = uitofp i32 %3 to float store float %4, ptr %1, align 4 @@ -293,6 +694,28 @@ define signext i16 @fcvt_w_s_i16(float %a) nounwind { ; RV64IF-NEXT: slli a0, a0, 48 ; RV64IF-NEXT: srai a0, a0, 48 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfsi +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfsi +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi float %a to i16 ret i16 %1 } @@ -313,6 +736,30 @@ define zeroext i16 @fcvt_wu_s_i16(float %a) nounwind { ; RV64IF-NEXT: addiw a1, a1, -1 ; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui float %a to i16 ret i16 %1 } @@ -331,6 +778,28 @@ define signext i8 @fcvt_w_s_i8(float %a) nounwind { ; RV64IF-NEXT: slli a0, a0, 56 ; RV64IF-NEXT: srai a0, a0, 56 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfsi +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfsi +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptosi float %a to i8 ret i8 %1 } @@ -341,6 +810,26 @@ define zeroext i8 @fcvt_wu_s_i8(float %a) nounwind { ; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rtz ; CHECKIF-NEXT: andi a0, a0, 255 ; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = fptoui float %a to i8 ret i8 %1 } From d09707070c7460d0887eae8f7022e816510d5eb1 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 3 Dec 2024 07:12:34 +0000 Subject: [PATCH 031/191] Re-apply "[ORC][JITLink] Add jitlink::Scope::SideEffectsOnly" with fixes. This reapplies aba6bb0820b, which was reverted in 28e2a891210 due to bot failures. It contains fixes to silence warnings for uncovered switches, and for incorrect initializer-symbol handling on ELF and COFF. --- llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h | 3 +++ llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 2 ++ llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp | 4 +++- llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp | 6 +++--- llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp | 6 +++--- llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 6 +++--- llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 12 ++++++------ 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 9844214c537a0..2831ebb3be798 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -393,10 +393,13 @@ const char *getLinkageName(Linkage L); /// Defines the scope in which this symbol should be visible: /// Default -- Visible in the public interface of the linkage unit. /// Hidden -- Visible within the linkage unit, but not exported from it. +/// SideEffectsOnly -- Like hidden, but symbol can only be looked up once +/// to trigger materialization of the containing graph. /// Local -- Visible only within the LinkGraph. enum class Scope : uint8_t { Default, Hidden, + SideEffectsOnly, Local }; diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 9041dc3a52dcf..2da3750b4ed25 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -85,6 +85,8 @@ const char *getScopeName(Scope S) { return "default"; case Scope::Hidden: return "hidden"; + case Scope::SideEffectsOnly: + return "side-effects-only"; case Scope::Local: return "local"; } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 254c04b198612..e5dbb7ee0510a 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -253,7 +253,9 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { } switch (Sym->getScope()) { case Scope::Local: - llvm_unreachable("External symbol should not have local linkage"); + case Scope::SideEffectsOnly: + llvm_unreachable("External symbol should not have local or " + "side-effects-only linkage"); case Scope::Hidden: break; case Scope::Default: diff --git a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp index 007e18e307399..a6c1d1ac632a0 100644 --- a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp @@ -862,9 +862,9 @@ Error COFFPlatform::COFFPlatformPlugin::preserveInitializerSections( // to the first block. if (!InitSym) { auto &B = **InitSection.blocks().begin(); - InitSym = &G.addDefinedSymbol(B, 0, *InitSymName, B.getSize(), - jitlink::Linkage::Strong, - jitlink::Scope::Default, false, true); + InitSym = &G.addDefinedSymbol( + B, 0, *InitSymName, B.getSize(), jitlink::Linkage::Strong, + jitlink::Scope::SideEffectsOnly, false, true); } // Add keep-alive edges to anonymous symbols in all other init blocks. diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index 431c64996b2c5..c3a217a802cb7 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -894,9 +894,9 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections( // to the first block. if (!InitSym) { auto &B = **InitSection.blocks().begin(); - InitSym = &G.addDefinedSymbol(B, 0, *InitSymName, B.getSize(), - jitlink::Linkage::Strong, - jitlink::Scope::Default, false, true); + InitSym = &G.addDefinedSymbol( + B, 0, *InitSymName, B.getSize(), jitlink::Linkage::Strong, + jitlink::Scope::SideEffectsOnly, false, true); } // Add keep-alive edges to anonymous symbols in all other init blocks. diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 3e02beb0baa86..1b18a4d0596c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -1001,9 +1001,9 @@ Error MachOPlatform::MachOPlatformPlugin::preserveImportantSections( // to the first block. if (!InitSym) { auto &B = **InitSection->blocks().begin(); - InitSym = &G.addDefinedSymbol(B, 0, *InitSymName, B.getSize(), - jitlink::Linkage::Strong, - jitlink::Scope::Default, false, true); + InitSym = &G.addDefinedSymbol( + B, 0, *InitSymName, B.getSize(), jitlink::Linkage::Strong, + jitlink::Scope::SideEffectsOnly, false, true); } // Add keep-alive edges to anonymous symbols in all other init blocks. diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index c1c55408c7858..c5342c4f4deb3 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -65,6 +65,8 @@ JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { if (Sym.getScope() == Scope::Default) Flags |= JITSymbolFlags::Exported; + else if (Sym.getScope() == Scope::SideEffectsOnly) + Flags |= JITSymbolFlags::MaterializationSideEffectsOnly; if (Sym.isCallable()) Flags |= JITSymbolFlags::Callable; @@ -236,7 +238,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { SymbolMap InternedResult; for (auto *Sym : G.defined_symbols()) - if (Sym->getScope() != Scope::Local) { + if (Sym->getScope() < Scope::SideEffectsOnly) { auto InternedName = ES.intern(Sym->getName()); auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); @@ -249,7 +251,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } for (auto *Sym : G.absolute_symbols()) - if (Sym->getScope() != Scope::Local) { + if (Sym->getScope() < Scope::SideEffectsOnly) { auto InternedName = ES.intern(Sym->getName()); auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); @@ -281,11 +283,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { // If this is a materialization-side-effects only symbol then bump // the counter and remove in from the result, otherwise make sure that // it's defined. - if (Flags.hasMaterializationSideEffectsOnly()) { + if (Flags.hasMaterializationSideEffectsOnly()) ++NumMaterializationSideEffectsOnlySymbols; - InternedResult.erase(Sym); - continue; - } else if (I == InternedResult.end()) + else if (I == InternedResult.end()) MissingSymbols.push_back(Sym); else if (Layer.OverrideObjectFlags) I->second.setFlags(Flags); From c1ad064dd3b9a1bf503bb8cd6d0d0418a05d10e8 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 3 Dec 2024 16:19:12 +0800 Subject: [PATCH 032/191] [InstCombine] Fold `icmp spred (and X, highmask), C1` into `icmp spred X, C2` (#118197) Alive2: https://alive2.llvm.org/ce/z/Ffg64g Closes https://github.com/llvm/llvm-project/issues/104772. --- .../InstCombine/InstCombineCompares.cpp | 11 ++ .../test/Transforms/InstCombine/icmp-binop.ll | 107 ++++++++++++++++++ llvm/test/Transforms/InstCombine/icmp.ll | 3 +- llvm/test/Transforms/InstCombine/pr17827.ll | 18 +-- 4 files changed, 124 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index fed21db393ed2..5871973776683 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1760,6 +1760,17 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp, if (!match(And, m_And(m_Value(X), m_APInt(C2)))) return nullptr; + // (and X, highmask) s> [0, ~highmask] --> X s> ~highmask + if (Cmp.getPredicate() == ICmpInst::ICMP_SGT && C1.ule(~*C2) && + C2->isNegatedPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_SGT, X, + ConstantInt::get(X->getType(), ~*C2)); + // (and X, highmask) s< [1, -highmask] --> X s< -highmask + if (Cmp.getPredicate() == ICmpInst::ICMP_SLT && !C1.isSignMask() && + (C1 - 1).ule(~*C2) && C2->isNegatedPowerOf2() && !C2->isSignMask()) + return new ICmpInst(ICmpInst::ICMP_SLT, X, + ConstantInt::get(X->getType(), -*C2)); + // Don't perform the following transforms if the AND has multiple uses if (!And->hasOneUse()) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/icmp-binop.ll b/llvm/test/Transforms/InstCombine/icmp-binop.ll index 878f39bb7c9a5..356489716fff9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-binop.ll +++ b/llvm/test/Transforms/InstCombine/icmp-binop.ll @@ -252,3 +252,110 @@ false: call void @use64(i64 %v) ret i1 false } + +define i1 @test_icmp_sgt_and_negpow2_zero(i32 %add) { +; CHECK-LABEL: @test_icmp_sgt_and_negpow2_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ADD:%.*]], 7 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -8 + %cmp = icmp sgt i32 %and, 0 + ret i1 %cmp +} + +define i1 @test_icmp_slt_and_negpow2_one(i32 %add) { +; CHECK-LABEL: @test_icmp_slt_and_negpow2_one( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD:%.*]], 8 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -8 + %cmp = icmp slt i32 %and, 1 + ret i1 %cmp +} + +define i1 @test_icmp_sgt_and_negpow2_nonzero(i32 %add) { +; CHECK-LABEL: @test_icmp_sgt_and_negpow2_nonzero( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD:%.*]], -8 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[AND]], -2 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -8 + %cmp = icmp sgt i32 %and, -2 + ret i1 %cmp +} + +define i1 @test_icmp_sgt_and_nonnegpow2_zero(i32 %add) { +; CHECK-LABEL: @test_icmp_sgt_and_nonnegpow2_zero( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, 8 + %cmp = icmp sgt i32 %and, 0 + ret i1 %cmp +} + +define i1 @test_icmp_ult_and_negpow2_one(i32 %add) { +; CHECK-LABEL: @test_icmp_ult_and_negpow2_one( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD:%.*]], 8 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -8 + %cmp = icmp ult i32 %and, 1 + ret i1 %cmp +} + +define i1 @test_imply_dom_condition(i32 %add) { +; CHECK-LABEL: @test_imply_dom_condition( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ADD:%.*]], 7 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 false +; + %and = and i32 %add, -8 + %cmp = icmp sgt i32 %and, 0 + tail call void @llvm.assume(i1 %cmp) + %min.iters.check = icmp ult i32 %and, 8 + ret i1 %min.iters.check +} + +define i1 @test_icmp_slt_and_negpow2_c(i32 %add) { +; CHECK-LABEL: @test_icmp_slt_and_negpow2_c( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD:%.*]], 32 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -32 + %cmp = icmp slt i32 %and, 16 + ret i1 %cmp +} + +define i1 @test_icmp_slt_and_negpow2_invalid_c(i32 %add) { +; CHECK-LABEL: @test_icmp_slt_and_negpow2_invalid_c( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD:%.*]], -32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[AND]], 48 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -32 + %cmp = icmp slt i32 %and, 48 + ret i1 %cmp +} + +define i1 @test_icmp_sgt_and_negpow2_c(i32 %add) { +; CHECK-LABEL: @test_icmp_sgt_and_negpow2_c( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ADD:%.*]], 31 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -32 + %cmp = icmp sgt i32 %and, 16 + ret i1 %cmp +} + +define i1 @test_icmp_sgt_and_negpow2_invalid_c(i32 %add) { +; CHECK-LABEL: @test_icmp_sgt_and_negpow2_invalid_c( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD:%.*]], -32 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[AND]], 48 +; CHECK-NEXT: ret i1 [[CMP]] +; + %and = and i32 %add, -32 + %cmp = icmp sgt i32 %and, 48 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index c1b9752607c3d..b266d3e77c434 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -2197,8 +2197,7 @@ define i1 @icmp_ashr_and_overshift(i8 %X) { define i1 @icmp_and_ashr_neg_and_legal(i8 %x) { ; CHECK-LABEL: @icmp_and_ashr_neg_and_legal( -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 16 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 32 ; CHECK-NEXT: ret i1 [[CMP]] ; %ashr = ashr i8 %x, 4 diff --git a/llvm/test/Transforms/InstCombine/pr17827.ll b/llvm/test/Transforms/InstCombine/pr17827.ll index 2f10bb5c7f25f..58b77ec60620e 100644 --- a/llvm/test/Transforms/InstCombine/pr17827.ll +++ b/llvm/test/Transforms/InstCombine/pr17827.ll @@ -5,8 +5,7 @@ define i1 @test_shift_and_cmp_not_changed1(i8 %p) { ; CHECK-LABEL: @test_shift_and_cmp_not_changed1( ; CHECK-NEXT: [[SHLP:%.*]] = shl i8 [[P:%.*]], 5 -; CHECK-NEXT: [[ANDP:%.*]] = and i8 [[SHLP]], -64 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ANDP]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SHLP]], 64 ; CHECK-NEXT: ret i1 [[CMP]] ; %shlp = shl i8 %p, 5 @@ -18,10 +17,7 @@ define i1 @test_shift_and_cmp_not_changed1(i8 %p) { ; With arithmetic right shift, the comparison should not be modified. define i1 @test_shift_and_cmp_not_changed2(i8 %p) { ; CHECK-LABEL: @test_shift_and_cmp_not_changed2( -; CHECK-NEXT: [[SHLP:%.*]] = ashr i8 [[P:%.*]], 5 -; CHECK-NEXT: [[ANDP:%.*]] = and i8 [[SHLP]], -64 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ANDP]], 32 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %shlp = ashr i8 %p, 5 %andp = and i8 %shlp, -64 @@ -34,8 +30,7 @@ define i1 @test_shift_and_cmp_not_changed2(i8 %p) { define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) { ; CHECK-LABEL: @test_shift_and_cmp_changed1( ; CHECK-NEXT: [[ANDP:%.*]] = shl i8 [[P:%.*]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[ANDP]], -64 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ANDP]], 33 ; CHECK-NEXT: ret i1 [[CMP]] ; %andp = and i8 %p, 6 @@ -50,8 +45,7 @@ define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) { define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) { ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec( ; CHECK-NEXT: [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], splat (i8 5) -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[ANDP]], splat (i8 -64) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 32) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[ANDP]], splat (i8 33) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %andp = and <2 x i8> %p, @@ -91,9 +85,7 @@ define <2 x i1> @test_shift_and_cmp_changed2_vec(<2 x i8> %p) { ; nsw on the shift should not affect the comparison. define i1 @test_shift_and_cmp_changed3(i8 %p) { ; CHECK-LABEL: @test_shift_and_cmp_changed3( -; CHECK-NEXT: [[SHLP:%.*]] = shl nsw i8 [[P:%.*]], 5 -; CHECK-NEXT: [[ANDP:%.*]] = and i8 [[SHLP]], -64 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ANDP]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[P:%.*]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; %shlp = shl nsw i8 %p, 5 From 46446bb2d31a7e3b2f857613b190150d41734696 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 09:49:26 +0100 Subject: [PATCH 033/191] [clang][bytecode][NFC] Diagnose non-constexpr builtin strcmp calls (#118442) --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 ++++++++++++++---------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index f3024dc3e26fe..8ff0fad0aa5a7 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -148,6 +148,17 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, #undef RET_CASE } +static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC, + unsigned ID) { + auto Loc = S.Current->getSource(OpPC); + if (S.getLangOpts().CPlusPlus11) + S.CCEDiag(Loc, diag::note_constexpr_invalid_function) + << /*isConstexpr=*/0 << /*isConstructor=*/0 + << ("'" + S.getASTContext().BuiltinInfo.getName(ID) + "'").str(); + else + S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); +} + static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -181,10 +192,14 @@ static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, - const CallExpr *Call) { + const Function *Func, const CallExpr *Call) { + unsigned ID = Func->getBuiltinID(); const Pointer &A = getParam(Frame, 0); const Pointer &B = getParam(Frame, 1); + if (ID == Builtin::BIstrcmp) + diagnoseNonConstexprBuiltin(S, OpPC, ID); + if (!CheckLive(S, OpPC, A, AK_Read) || !CheckLive(S, OpPC, B, AK_Read)) return false; @@ -222,16 +237,6 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, return true; } -static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC, - unsigned ID) { - auto Loc = S.Current->getSource(OpPC); - if (S.getLangOpts().CPlusPlus11) - S.CCEDiag(Loc, diag::note_constexpr_invalid_function) - << /*isConstexpr=*/0 << /*isConstructor=*/0 - << ("'" + S.getASTContext().BuiltinInfo.getName(ID) + "'").str(); - else - S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); -} static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *Func, const CallExpr *Call) { @@ -1846,7 +1851,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, case Builtin::BI__assume: break; case Builtin::BI__builtin_strcmp: - if (!interp__builtin_strcmp(S, OpPC, Frame, Call)) + case Builtin::BIstrcmp: + if (!interp__builtin_strcmp(S, OpPC, Frame, F, Call)) return false; break; case Builtin::BI__builtin_strlen: From e30d304d72ac9a70671268c50ee850c2f0c42ba3 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 3 Dec 2024 09:43:34 +0100 Subject: [PATCH 034/191] [MemCpyOpt] Introduce test for PR101930 (NFC) --- .../memset-memmove-redundant-memmove.ll | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll new file mode 100644 index 0000000000000..4b09a2057b4c3 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s + +; Redundant memmove. +define i32 @redundant_memmove() { +; CHECK-LABEL: @redundant_memmove( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false) +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [26 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false) + %array.idx = getelementptr inbounds i8, ptr %array, i64 4 + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false) + %val = load i32, ptr %array, align 16 + ret i32 %val +} + +; Used memmove, buffer is reset to zero. +define i32 @used_memmove_1() { +; CHECK-LABEL: @used_memmove_1( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false) +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: store i32 1, ptr [[ARRAY_IDX]], align 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [26 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false) + %array.idx = getelementptr inbounds i8, ptr %array, i64 4 + store i32 1, ptr %array.idx + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false) + %val = load i32, ptr %array.idx, align 4 + ret i32 %val +} + +; Used memmove. +define i32 @used_memmove_2() { +; CHECK-LABEL: @used_memmove_2( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false) +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: store i32 1, ptr [[ARRAY]], align 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [26 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false) + %array.idx = getelementptr inbounds i8, ptr %array, i64 4 + store i32 1, ptr %array + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false) + %val = load i32, ptr %array.idx, align 4 + ret i32 %val +} + +; Used memmove, buffer clobbered by opaque. +define i32 @used_memmove_3() { +; CHECK-LABEL: @used_memmove_3( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [25 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 100, i1 false) +; CHECK-NEXT: call void @opaque(ptr [[ARRAY]]) +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 96, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [25 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 100, i1 false) + call void @opaque(ptr %array) + %array.idx = getelementptr inbounds i8, ptr %array, i64 4 + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 96, i1 false) + %val = load i32, ptr %array, align 16 + ret i32 %val +} + +; Redundant memmove, not within the same basic block. +define i32 @redundant_memmove_different_bbs() { +; CHECK-LABEL: @redundant_memmove_different_bbs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false) +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: br label [[USE:%.*]] +; CHECK: use: +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16 +; CHECK-NEXT: ret i32 [[VAL]] +; +entry: + %array = alloca [26 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false) + %array.idx = getelementptr inbounds i8, ptr %array, i64 4 + br label %use + +use: ; preds = %entry + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false) + %val = load i32, ptr %array, align 16 + ret i32 %val +} + +@g_var = global [26 x i32] zeroinitializer, align 16 + +; Redundant memmove on a global variable. +define ptr @redundant_memmove_memset_global_variable() { +; CHECK-LABEL: @redundant_memmove_memset_global_variable( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false) +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false) +; CHECK-NEXT: ret ptr @g_var +; + call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false) + call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false) + ret ptr @g_var +} + +; Memset only partial. +define i32 @partial_memset() { +; CHECK-LABEL: @partial_memset( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 92 +; CHECK-NEXT: store i32 1, ptr [[ARRAY_IDX]], align 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 26, i1 false) +; CHECK-NEXT: [[ARRAY_IDX_2:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX_2]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [26 x i32], align 16 + %array.idx = getelementptr inbounds i8, ptr %array, i64 92 + store i32 1, ptr %array.idx + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 26, i1 false) + %array.idx.2 = getelementptr inbounds i8, ptr %array, i64 4 + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx.2, i64 100, i1 false) + %val = load i32, ptr %array.idx, align 4 + ret i32 %val +} + +; Memset length not constant. +define i32 @memset_length_not_constant(i64 %size) { +; CHECK-LABEL: @memset_length_not_constant( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 [[SIZE:%.*]], i1 false) +; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [26 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 %size, i1 false) + %array.idx = getelementptr inbounds i8, ptr %array, i64 4 + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false) + %val = load i32, ptr %array, align 16 + ret i32 %val +} + +; Memmove buffer not memset'd, different buffers. +define i32 @memset_memmove_dest_buffers_not_alias() { +; CHECK-LABEL: @memset_memmove_dest_buffers_not_alias( +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: [[ARRAY2:%.*]] = alloca [26 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false) +; CHECK-NEXT: [[ARRAY2_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY2]], i64 4 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY2]], ptr align 4 [[ARRAY2_IDX]], i64 100, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY2]], align 16 +; CHECK-NEXT: ret i32 [[VAL]] +; + %array = alloca [26 x i32], align 16 + %array2 = alloca [26 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false) + %array2.idx = getelementptr inbounds i8, ptr %array2, i64 4 + call void @llvm.memmove.p0.p0.i64(ptr align 16 %array2, ptr align 4 %array2.idx, i64 100, i1 false) + %val = load i32, ptr %array2, align 16 + ret i32 %val +} + +declare void @opaque(ptr) +declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) From 1d6ab189be031bf723abf35f772fbd5d4c86c612 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 3 Dec 2024 09:43:52 +0100 Subject: [PATCH 035/191] [MemCpyOpt] Drop dead `memmove` calls on `memset`'d source data When a memmove happens to clobber source data, and such data have been previously memset'd, the memmove may be redundant. --- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 3 +- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 70 ++++++++++++++++++- .../memset-memmove-redundant-memmove.ll | 3 - 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 023c9de28209c..496d2958fc2d0 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -68,7 +68,7 @@ class MemCpyOptPass : public PassInfoMixin { BasicBlock::iterator &BBI); bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI); - bool processMemMove(MemMoveInst *M); + bool processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI); bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDst, Value *cpySrc, TypeSize cpyLen, Align cpyAlign, BatchAAResults &BAA, @@ -87,6 +87,7 @@ class MemCpyOptPass : public PassInfoMixin { bool performStackMoveOptzn(Instruction *Load, Instruction *Store, AllocaInst *DestAlloca, AllocaInst *SrcAlloca, TypeSize Size, BatchAAResults &BAA); + bool isMemMoveMemSetDependency(MemMoveInst *M); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e9e1071ea210c..0cba5d077da62 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -68,6 +68,7 @@ static cl::opt EnableMemCpyOptWithoutLibcalls( cl::desc("Enable memcpyopt even when libcalls are disabled")); STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); +STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); @@ -1841,12 +1842,75 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { return false; } +/// Memmove calls with overlapping src/dest buffers that come after a memset may +/// be removed. +bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) { + const auto &DL = M->getDataLayout(); + MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M); + if (!MemMoveAccess) + return false; + + // The memmove is of form memmove(x, x + A, B). + MemoryLocation SourceLoc = MemoryLocation::getForSource(M); + auto *MemMoveSourceOp = M->getSource(); + auto *Source = dyn_cast(MemMoveSourceOp); + if (!Source) + return false; + + APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0); + LocationSize MemMoveLocSize = SourceLoc.Size; + if (Source->getPointerOperand() != M->getDest() || + !MemMoveLocSize.hasValue() || + !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) { + return false; + } + + uint64_t MemMoveSize = MemMoveLocSize.getValue(); + LocationSize TotalSize = + LocationSize::precise(Offset.getZExtValue() + MemMoveSize); + MemoryLocation CombinedLoc(M->getDest(), TotalSize); + + // The first dominating clobbering MemoryAccess for the combined location + // needs to be a memset. + BatchAAResults BAA(*AA); + MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess(); + auto *DestClobber = dyn_cast( + MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA)); + if (!DestClobber) + return false; + + auto *MS = dyn_cast_or_null(DestClobber->getMemoryInst()); + if (!MS) + return false; + + // Memset length must be sufficiently large. + auto *MemSetLength = dyn_cast(MS->getLength()); + if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize) + return false; + + // The destination buffer must have been memset'd. + if (!BAA.isMustAlias(MS->getDest(), M->getDest())) + return false; + + return true; +} + /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed /// not to alias. -bool MemCpyOptPass::processMemMove(MemMoveInst *M) { +bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) { // See if the source could be modified by this memmove potentially. - if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) + if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) { + // On the off-chance the memmove clobbers src with previously memset'd + // bytes, the memmove may be redundant. + if (!M->isVolatile() && isMemMoveMemSetDependency(M)) { + LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n"); + ++BBI; + eraseInstruction(M); + ++NumMemMoveInstr; + return true; + } return false; + } LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M << "\n"); @@ -2064,7 +2128,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemCpy(M, BI); else if (auto *M = dyn_cast(I)) - RepeatInstruction = processMemMove(M); + RepeatInstruction = processMemMove(M, BI); else if (auto *CB = dyn_cast(I)) { for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { if (CB->isByValArgument(i)) diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll index 4b09a2057b4c3..c7593e2941518 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll @@ -7,7 +7,6 @@ define i32 @redundant_memmove() { ; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false) ; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 -; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16 ; CHECK-NEXT: ret i32 [[VAL]] ; @@ -88,7 +87,6 @@ define i32 @redundant_memmove_different_bbs() { ; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4 ; CHECK-NEXT: br label [[USE:%.*]] ; CHECK: use: -; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false) ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16 ; CHECK-NEXT: ret i32 [[VAL]] ; @@ -110,7 +108,6 @@ use: ; preds = %entry define ptr @redundant_memmove_memset_global_variable() { ; CHECK-LABEL: @redundant_memmove_memset_global_variable( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false) -; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false) ; CHECK-NEXT: ret ptr @g_var ; call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false) From 5b0f4f2cb040f472d41870ba99ae45fc1022188f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 3 Dec 2024 09:55:12 +0100 Subject: [PATCH 036/191] [BasicAA] Treat returns_twice functions as clobbering unescaped objects (#117902) Effectively this models all the accesses that occur between the first and second return as happening at the point of the call. Fixes https://github.com/llvm/llvm-project/issues/116668. --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 8 ++- llvm/test/Transforms/GVN/setjmp.ll | 66 +++++++++++++++++++++++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 381fb7bbdb517..648a22deaf6ba 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -947,8 +947,14 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, // // Make sure the object has not escaped here, and then check that none of the // call arguments alias the object below. + // + // We model calls that can return twice (setjmp) as clobbering non-escaping + // objects, to model any accesses that may occur prior to the second return. + // As an exception, ignore allocas, as setjmp is not required to preserve + // non-volatile stores for them. if (!isa(Object) && Call != Object && - AAQI.CA->isNotCapturedBefore(Object, Call, /*OrAt*/ false)) { + AAQI.CA->isNotCapturedBefore(Object, Call, /*OrAt*/ false) && + (isa(Object) || !Call->hasFnAttr(Attribute::ReturnsTwice))) { // Optimistically assume that call doesn't touch Object and check this // assumption in the following loop. diff --git a/llvm/test/Transforms/GVN/setjmp.ll b/llvm/test/Transforms/GVN/setjmp.ll index 0277fcfa226ed..07b7028346760 100644 --- a/llvm/test/Transforms/GVN/setjmp.ll +++ b/llvm/test/Transforms/GVN/setjmp.ll @@ -5,7 +5,6 @@ declare i32 @setjmp() returns_twice declare void @longjmp() declare ptr @malloc(i64) -; FIXME: This is a miscompile. define i32 @test() { ; CHECK-LABEL: define i32 @test() { ; CHECK-NEXT: [[MALLOC:%.*]] = call noalias ptr @malloc(i64 4) @@ -18,7 +17,8 @@ define i32 @test() { ; CHECK-NEXT: call void @longjmp() ; CHECK-NEXT: unreachable ; CHECK: [[IF_END]]: -; CHECK-NEXT: ret i32 10 +; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MALLOC]], align 4 +; CHECK-NEXT: ret i32 [[RES]] ; %malloc = call noalias ptr @malloc(i64 4) store i32 10, ptr %malloc, align 4 @@ -35,3 +35,65 @@ if.end: %res = load i32, ptr %malloc ret i32 %res } + +; We are still allowed to optimize non-volatile accesses to allocas. +define i32 @test_alloca() { +; CHECK-LABEL: define i32 @test_alloca() { +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i43, align 8 +; CHECK-NEXT: store i32 10, ptr [[ALLOC]], align 4 +; CHECK-NEXT: [[SJ:%.*]] = call i32 @setjmp() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SJ]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 20, ptr [[ALLOC]], align 4 +; CHECK-NEXT: call void @longjmp() +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret i32 10 +; + %alloc = alloca i43 + store i32 10, ptr %alloc, align 4 + %sj = call i32 @setjmp() + %cmp = icmp eq i32 %sj, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 20, ptr %alloc + call void @longjmp() + unreachable + +if.end: + %res = load i32, ptr %alloc + ret i32 %res +} + +define i32 @test_alloca_volatile() { +; CHECK-LABEL: define i32 @test_alloca_volatile() { +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i43, align 8 +; CHECK-NEXT: store volatile i32 10, ptr [[ALLOC]], align 4 +; CHECK-NEXT: [[SJ:%.*]] = call i32 @setjmp() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SJ]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store volatile i32 20, ptr [[ALLOC]], align 4 +; CHECK-NEXT: call void @longjmp() +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[RES:%.*]] = load volatile i32, ptr [[ALLOC]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; + %alloc = alloca i43 + store volatile i32 10, ptr %alloc, align 4 + %sj = call i32 @setjmp() + %cmp = icmp eq i32 %sj, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store volatile i32 20, ptr %alloc + call void @longjmp() + unreachable + +if.end: + %res = load volatile i32, ptr %alloc + ret i32 %res +} From a871124f8709f6b5e837c6044ce7df056f52292a Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 3 Dec 2024 09:59:43 +0100 Subject: [PATCH 037/191] [flang][hlfir] optimize hlfir.eval_in_mem bufferization (#118069) This patch extends the optimize bufferization to deal with the new hlfir.eval_in_mem and move the evaluation contained in its body to operate directly over the LHS when it can prove there are no access to the LHS inside the region (and that the LHS is contiguous). This will allow the array function call optimization when lowering is changed to produce an hlfir.eval_in_mem in the next patch. --- .../flang/Optimizer/Analysis/AliasAnalysis.h | 6 ++ .../lib/Optimizer/Analysis/AliasAnalysis.cpp | 41 +++++++- .../Transforms/OptimizedBufferization.cpp | 95 +++++++++++++++++++ .../HLFIR/opt-bufferization-eval_in_mem.fir | 67 +++++++++++++ 4 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 flang/test/HLFIR/opt-bufferization-eval_in_mem.fir diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h index e410831c0fc3e..8d17e4e476d10 100644 --- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h +++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h @@ -198,6 +198,12 @@ struct AliasAnalysis { /// Return the modify-reference behavior of `op` on `location`. mlir::ModRefResult getModRef(mlir::Operation *op, mlir::Value location); + /// Return the modify-reference behavior of operations inside `region` on + /// `location`. Contrary to getModRef(operation, location), this will visit + /// nested regions recursively according to the HasRecursiveMemoryEffects + /// trait. + mlir::ModRefResult getModRef(mlir::Region ®ion, mlir::Value location); + /// Return the memory source of a value. /// If getLastInstantiationPoint is true, the search for the source /// will stop at [hl]fir.declare if it represents a dummy diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 2b24791d6c7c5..0b0f83d024ce3 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -91,6 +91,13 @@ bool AliasAnalysis::Source::isDummyArgument() const { return false; } +static bool isEvaluateInMemoryBlockArg(mlir::Value v) { + if (auto evalInMem = llvm::dyn_cast_or_null( + v.getParentRegion()->getParentOp())) + return evalInMem.getMemory() == v; + return false; +} + bool AliasAnalysis::Source::isData() const { return origin.isData; } bool AliasAnalysis::Source::isBoxData() const { return mlir::isa(fir::unwrapRefType(valueType)) && @@ -457,6 +464,33 @@ ModRefResult AliasAnalysis::getModRef(Operation *op, Value location) { return result; } +ModRefResult AliasAnalysis::getModRef(mlir::Region ®ion, + mlir::Value location) { + ModRefResult result = ModRefResult::getNoModRef(); + for (mlir::Operation &op : region.getOps()) { + if (op.hasTrait()) { + for (mlir::Region &subRegion : op.getRegions()) { + result = result.merge(getModRef(subRegion, location)); + // Fast return is already mod and ref. + if (result.isModAndRef()) + return result; + } + // In MLIR, RecursiveMemoryEffects can be combined with + // MemoryEffectOpInterface to describe extra effects on top of the + // effects of the nested operations. However, the presence of + // RecursiveMemoryEffects and the absence of MemoryEffectOpInterface + // implies the operation has no other memory effects than the one of its + // nested operations. + if (!mlir::isa(op)) + continue; + } + result = result.merge(getModRef(&op, location)); + if (result.isModAndRef()) + return result; + } + return result; +} + AliasAnalysis::Source::Attributes getAttrsFromVariable(fir::FortranVariableOpInterface var) { AliasAnalysis::Source::Attributes attrs; @@ -698,7 +732,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, breakFromLoop = true; }); } - if (!defOp && type == SourceKind::Unknown) + if (!defOp && type == SourceKind::Unknown) { // Check if the memory source is coming through a dummy argument. if (isDummyArgument(v)) { type = SourceKind::Argument; @@ -708,7 +742,12 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, if (isPointerReference(ty)) attributes.set(Attribute::Pointer); + } else if (isEvaluateInMemoryBlockArg(v)) { + // hlfir.eval_in_mem block operands is allocated by the operation. + type = SourceKind::Allocate; + ty = v.getType(); } + } if (type == SourceKind::Global) { return {{global, instantiationPoint, followingData}, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index a0160b233e3cd..9327e7ad5875c 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -1108,6 +1108,100 @@ class ReductionMaskConversion : public mlir::OpRewritePattern { } }; +class EvaluateIntoMemoryAssignBufferization + : public mlir::OpRewritePattern { + +public: + using mlir::OpRewritePattern::OpRewritePattern; + + llvm::LogicalResult + matchAndRewrite(hlfir::EvaluateInMemoryOp, + mlir::PatternRewriter &rewriter) const override; +}; + +static llvm::LogicalResult +tryUsingAssignLhsDirectly(hlfir::EvaluateInMemoryOp evalInMem, + mlir::PatternRewriter &rewriter) { + mlir::Location loc = evalInMem.getLoc(); + hlfir::DestroyOp destroy; + hlfir::AssignOp assign; + for (auto user : llvm::enumerate(evalInMem->getUsers())) { + if (user.index() > 2) + return mlir::failure(); + mlir::TypeSwitch(user.value()) + .Case([&](hlfir::AssignOp op) { assign = op; }) + .Case([&](hlfir::DestroyOp op) { destroy = op; }); + } + if (!assign || !destroy || destroy.mustFinalizeExpr() || + assign.isAllocatableAssignment()) + return mlir::failure(); + + hlfir::Entity lhs{assign.getLhs()}; + // EvaluateInMemoryOp memory is contiguous, so in general, it can only be + // replace by the LHS if the LHS is contiguous. + if (!lhs.isSimplyContiguous()) + return mlir::failure(); + // Character assignment may involves truncation/padding, so the LHS + // cannot be used to evaluate RHS in place without proving the LHS and + // RHS lengths are the same. + if (lhs.isCharacter()) + return mlir::failure(); + fir::AliasAnalysis aliasAnalysis; + // The region must not read or write the LHS. + // Note that getModRef is used instead of mlir::MemoryEffects because + // EvaluateInMemoryOp is typically expected to hold fir.calls and that + // Fortran calls cannot be modeled in a useful way with mlir::MemoryEffects: + // it is hard/impossible to list all the read/written SSA values in a call, + // but it is often possible to tell that an SSA value cannot be accessed, + // hence getModRef is needed here and below. Also note that getModRef uses + // mlir::MemoryEffects for operations that do not have special handling in + // getModRef. + if (aliasAnalysis.getModRef(evalInMem.getBody(), lhs).isModOrRef()) + return mlir::failure(); + // Any variables affected between the hlfir.evalInMem and assignment must not + // be read or written inside the region since it will be moved at the + // assignment insertion point. + auto effects = getEffectsBetween(evalInMem->getNextNode(), assign); + if (!effects) { + LLVM_DEBUG( + llvm::dbgs() + << "operation with unknown effects between eval_in_mem and assign\n"); + return mlir::failure(); + } + for (const mlir::MemoryEffects::EffectInstance &effect : *effects) { + mlir::Value affected = effect.getValue(); + if (!affected || + aliasAnalysis.getModRef(evalInMem.getBody(), affected).isModOrRef()) + return mlir::failure(); + } + + rewriter.setInsertionPoint(assign); + fir::FirOpBuilder builder(rewriter, evalInMem.getOperation()); + mlir::Value rawLhs = hlfir::genVariableRawAddress(loc, builder, lhs); + hlfir::computeEvaluateOpIn(loc, builder, evalInMem, rawLhs); + rewriter.eraseOp(assign); + rewriter.eraseOp(destroy); + rewriter.eraseOp(evalInMem); + return mlir::success(); +} + +llvm::LogicalResult EvaluateIntoMemoryAssignBufferization::matchAndRewrite( + hlfir::EvaluateInMemoryOp evalInMem, + mlir::PatternRewriter &rewriter) const { + if (mlir::succeeded(tryUsingAssignLhsDirectly(evalInMem, rewriter))) + return mlir::success(); + // Rewrite to temp + as_expr here so that the assign + as_expr pattern can + // kick-in for simple types and at least implement the assignment inline + // instead of call Assign runtime. + fir::FirOpBuilder builder(rewriter, evalInMem.getOperation()); + mlir::Location loc = evalInMem.getLoc(); + auto [temp, isHeapAllocated] = hlfir::computeEvaluateOpInNewTemp( + loc, builder, evalInMem, evalInMem.getShape(), evalInMem.getTypeparams()); + rewriter.replaceOpWithNewOp( + evalInMem, temp, /*mustFree=*/builder.createBool(loc, isHeapAllocated)); + return mlir::success(); +} + class OptimizedBufferizationPass : public hlfir::impl::OptimizedBufferizationBase< OptimizedBufferizationPass> { @@ -1130,6 +1224,7 @@ class OptimizedBufferizationPass patterns.insert(context); patterns.insert(context); patterns.insert(context); + patterns.insert(context); patterns.insert>(context); patterns.insert>(context); patterns.insert>(context); diff --git a/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir b/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir new file mode 100644 index 0000000000000..984c0bcbaddcc --- /dev/null +++ b/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir @@ -0,0 +1,67 @@ +// RUN: fir-opt --opt-bufferization %s | FileCheck %s + +// Fortran F2023 15.5.2.14 point 4. ensures that _QPfoo cannot access _QFtestEx +// and the temporary storage for the result can be avoided. +func.func @_QPtest(%arg0: !fir.ref> {fir.bindc_name = "x"}) { + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %3 = hlfir.eval_in_mem shape %1 : (!fir.shape<1>) -> !hlfir.expr<10xf32> { + ^bb0(%arg1: !fir.ref>): + %4 = fir.call @_QPfoo() fastmath : () -> !fir.array<10xf32> + fir.save_result %4 to %arg1(%1) : !fir.array<10xf32>, !fir.ref>, !fir.shape<1> + } + hlfir.assign %3 to %2#0 : !hlfir.expr<10xf32>, !fir.ref> + hlfir.destroy %3 : !hlfir.expr<10xf32> + return +} +func.func private @_QPfoo() -> !fir.array<10xf32> + +// CHECK-LABEL: func.func @_QPtest( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "x"}) { +// CHECK: %[[VAL_1:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) dummy_scope %[[VAL_2]] {uniq_name = "_QFtestEx"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_5:.*]] = fir.call @_QPfoo() fastmath : () -> !fir.array<10xf32> +// CHECK: fir.save_result %[[VAL_5]] to %[[VAL_4]]#1(%[[VAL_3]]) : !fir.array<10xf32>, !fir.ref>, !fir.shape<1> +// CHECK: return +// CHECK: } + + +// Temporary storage cannot be avoided in this case since +// _QFnegative_test_is_targetEx has the TARGET attribute. +func.func @_QPnegative_test_is_target(%arg0: !fir.ref> {fir.bindc_name = "x", fir.target}) { + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %arg0(%1) dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFnegative_test_is_targetEx"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %3 = hlfir.eval_in_mem shape %1 : (!fir.shape<1>) -> !hlfir.expr<10xf32> { + ^bb0(%arg1: !fir.ref>): + %4 = fir.call @_QPfoo() fastmath : () -> !fir.array<10xf32> + fir.save_result %4 to %arg1(%1) : !fir.array<10xf32>, !fir.ref>, !fir.shape<1> + } + hlfir.assign %3 to %2#0 : !hlfir.expr<10xf32>, !fir.ref> + hlfir.destroy %3 : !hlfir.expr<10xf32> + return +} +// CHECK-LABEL: func.func @_QPnegative_test_is_target( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "x", fir.target}) { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant false +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_4:.*]] = fir.alloca !fir.array<10xf32> +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]]{{.*}} +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_4]]{{.*}} +// CHECK: %[[VAL_9:.*]] = fir.call @_QPfoo() fastmath : () -> !fir.array<10xf32> +// CHECK: fir.save_result %[[VAL_9]] to %[[VAL_8]]#1{{.*}} +// CHECK: %[[VAL_10:.*]] = hlfir.as_expr %[[VAL_8]]#0 move %[[VAL_2]] : (!fir.ref>, i1) -> !hlfir.expr<10xf32> +// CHECK: fir.do_loop %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_3]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_10]], %[[VAL_11]] : (!hlfir.expr<10xf32>, index) -> f32 +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_11]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] : f32, !fir.ref +// CHECK: } +// CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr<10xf32> +// CHECK: return +// CHECK: } From cd7e65398fbbd9642573013800dc3ae1e7307f82 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 3 Dec 2024 10:04:52 +0100 Subject: [PATCH 038/191] [flang] optimize array function calls using hlfir.eval_in_mem (#118070) This patch encapsulate array function call lowering into hlfir.eval_in_mem and allows directly evaluating the call into the LHS when possible. The conditions are: LHS is contiguous, not accessed inside the function, it is not a whole allocatable, and the function results needs not to be finalized. All these conditions are tested in the previous hlfir.eval_in_mem optimization (#118069) that is leveraging the extension of getModRef to handle function calls(#117164). This yields a 25% speed-up on polyhedron channel2 benchmark (from 1min to 45s measured on an X86-64 Zen 2). --- flang/include/flang/Lower/ConvertCall.h | 7 +- .../flang/Optimizer/HLFIR/HLFIRDialect.h | 4 + flang/lib/Lower/ConvertCall.cpp | 101 +++++++++----- flang/lib/Lower/ConvertExpr.cpp | 13 +- flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp | 13 ++ flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 11 +- .../order_assignments/where-scheduling.f90 | 2 +- .../test/Lower/HLFIR/calls-array-results.f90 | 131 ++++++++++++++++++ flang/test/Lower/HLFIR/where-nonelemental.f90 | 38 ++--- .../Lower/explicit-interface-results-2.f90 | 2 - .../test/Lower/explicit-interface-results.f90 | 8 +- flang/test/Lower/forall/array-constructor.f90 | 2 +- 12 files changed, 258 insertions(+), 74 deletions(-) create mode 100644 flang/test/Lower/HLFIR/calls-array-results.f90 diff --git a/flang/include/flang/Lower/ConvertCall.h b/flang/include/flang/Lower/ConvertCall.h index bc082907e6176..f1cd4f938320b 100644 --- a/flang/include/flang/Lower/ConvertCall.h +++ b/flang/include/flang/Lower/ConvertCall.h @@ -24,6 +24,11 @@ namespace Fortran::lower { +/// Data structure packaging the SSA value(s) produced for the result of lowered +/// function calls. +using LoweredResult = + std::variant; + /// Given a call site for which the arguments were already lowered, generate /// the call and return the result. This function deals with explicit result /// allocation and lowering if needed. It also deals with passing the host @@ -32,7 +37,7 @@ namespace Fortran::lower { /// It is only used for HLFIR. /// The returned boolean indicates if finalization has been emitted in /// \p stmtCtx for the result. -std::pair genCallOpAndResult( +std::pair genCallOpAndResult( mlir::Location loc, Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx, Fortran::lower::CallerInterface &caller, mlir::FunctionType callSiteType, diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIRDialect.h b/flang/include/flang/Optimizer/HLFIR/HLFIRDialect.h index 3830237f96f3c..447d5fbab8999 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIRDialect.h +++ b/flang/include/flang/Optimizer/HLFIR/HLFIRDialect.h @@ -61,6 +61,10 @@ inline mlir::Type getFortranElementOrSequenceType(mlir::Type type) { return type; } +/// Build the hlfir.expr type for the value held in a variable of type \p +/// variableType. +mlir::Type getExprType(mlir::Type variableType); + /// Is this a fir.box or fir.class address type? inline bool isBoxAddressType(mlir::Type type) { type = fir::dyn_cast_ptrEleTy(type); diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index e84e7afbe82e0..40cd106e63018 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -284,7 +284,8 @@ static void remapActualToDummyDescriptors( } } -std::pair Fortran::lower::genCallOpAndResult( +std::pair +Fortran::lower::genCallOpAndResult( mlir::Location loc, Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx, Fortran::lower::CallerInterface &caller, mlir::FunctionType callSiteType, @@ -326,6 +327,11 @@ std::pair Fortran::lower::genCallOpAndResult( } } + const bool isExprCall = + converter.getLoweringOptions().getLowerToHighLevelFIR() && + callSiteType.getNumResults() == 1 && + llvm::isa(callSiteType.getResult(0)); + mlir::IndexType idxTy = builder.getIndexType(); auto lowerSpecExpr = [&](const auto &expr) -> mlir::Value { mlir::Value convertExpr = builder.createConvert( @@ -333,6 +339,8 @@ std::pair Fortran::lower::genCallOpAndResult( return fir::factory::genMaxWithZero(builder, loc, convertExpr); }; llvm::SmallVector resultLengths; + mlir::Value arrayResultShape; + hlfir::EvaluateInMemoryOp evaluateInMemory; auto allocatedResult = [&]() -> std::optional { llvm::SmallVector extents; llvm::SmallVector lengths; @@ -366,6 +374,18 @@ std::pair Fortran::lower::genCallOpAndResult( resultLengths = lengths; } + if (!extents.empty()) + arrayResultShape = builder.genShape(loc, extents); + + if (isExprCall) { + mlir::Type exprType = hlfir::getExprType(type); + evaluateInMemory = builder.create( + loc, exprType, arrayResultShape, resultLengths); + builder.setInsertionPointToStart(&evaluateInMemory.getBody().front()); + return toExtendedValue(loc, evaluateInMemory.getMemory(), extents, + lengths); + } + if ((!extents.empty() || !lengths.empty()) && !isElemental) { // Note: in the elemental context, the alloca ownership inside the // elemental region is implicit, and later pass in lowering (stack @@ -384,8 +404,7 @@ std::pair Fortran::lower::genCallOpAndResult( if (mustPopSymMap) symMap.popScope(); - // Place allocated result or prepare the fir.save_result arguments. - mlir::Value arrayResultShape; + // Place allocated result if (allocatedResult) { if (std::optional::PassedEntity> @@ -399,16 +418,6 @@ std::pair Fortran::lower::genCallOpAndResult( else fir::emitFatalError( loc, "only expect character scalar result to be passed by ref"); - } else { - assert(caller.mustSaveResult()); - arrayResultShape = allocatedResult->match( - [&](const fir::CharArrayBoxValue &) { - return builder.createShape(loc, *allocatedResult); - }, - [&](const fir::ArrayBoxValue &) { - return builder.createShape(loc, *allocatedResult); - }, - [&](const auto &) { return mlir::Value{}; }); } } @@ -642,6 +651,19 @@ std::pair Fortran::lower::genCallOpAndResult( callResult = call.getResult(0); } + std::optional retTy = + caller.getCallDescription().proc().GetType(); + // With HLFIR lowering, isElemental must be set to true + // if we are producing an elemental call. In this case, + // the elemental results must not be destroyed, instead, + // the resulting array result will be finalized/destroyed + // as needed by hlfir.destroy. + const bool mustFinalizeResult = + !isElemental && callSiteType.getNumResults() > 0 && + !fir::isPointerType(callSiteType.getResult(0)) && retTy.has_value() && + (retTy->category() == Fortran::common::TypeCategory::Derived || + retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic()); + if (caller.mustSaveResult()) { assert(allocatedResult.has_value()); builder.create(loc, callResult, @@ -649,6 +671,19 @@ std::pair Fortran::lower::genCallOpAndResult( arrayResultShape, resultLengths); } + if (evaluateInMemory) { + builder.setInsertionPointAfter(evaluateInMemory); + mlir::Value expr = evaluateInMemory.getResult(); + fir::FirOpBuilder *bldr = &converter.getFirOpBuilder(); + if (!isElemental) + stmtCtx.attachCleanup([bldr, loc, expr, mustFinalizeResult]() { + bldr->create(loc, expr, + /*finalize=*/mustFinalizeResult); + }); + return {LoweredResult{hlfir::EntityWithAttributes{expr}}, + mustFinalizeResult}; + } + if (allocatedResult) { // The result must be optionally destroyed (if it is of a derived type // that may need finalization or deallocation of the components). @@ -679,17 +714,7 @@ std::pair Fortran::lower::genCallOpAndResult( // derived-type. // For polymorphic and unlimited polymorphic enities call the runtime // in any cases. - std::optional retTy = - caller.getCallDescription().proc().GetType(); - // With HLFIR lowering, isElemental must be set to true - // if we are producing an elemental call. In this case, - // the elemental results must not be destroyed, instead, - // the resulting array result will be finalized/destroyed - // as needed by hlfir.destroy. - if (!isElemental && !fir::isPointerType(funcType.getResults()[0]) && - retTy && - (retTy->category() == Fortran::common::TypeCategory::Derived || - retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic())) { + if (mustFinalizeResult) { if (retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic()) { auto *bldr = &converter.getFirOpBuilder(); stmtCtx.attachCleanup([bldr, loc, allocatedResult]() { @@ -715,12 +740,13 @@ std::pair Fortran::lower::genCallOpAndResult( } } } - return {*allocatedResult, resultIsFinalized}; + return {LoweredResult{*allocatedResult}, resultIsFinalized}; } // subroutine call if (!resultType) - return {fir::ExtendedValue{mlir::Value{}}, /*resultIsFinalized=*/false}; + return {LoweredResult{fir::ExtendedValue{mlir::Value{}}}, + /*resultIsFinalized=*/false}; // For now, Fortran return values are implemented with a single MLIR // function return value. @@ -734,10 +760,13 @@ std::pair Fortran::lower::genCallOpAndResult( mlir::dyn_cast(funcType.getResults()[0]); mlir::Value len = builder.createIntegerConstant( loc, builder.getCharacterLengthType(), charTy.getLen()); - return {fir::CharBoxValue{callResult, len}, /*resultIsFinalized=*/false}; + return { + LoweredResult{fir::ExtendedValue{fir::CharBoxValue{callResult, len}}}, + /*resultIsFinalized=*/false}; } - return {callResult, /*resultIsFinalized=*/false}; + return {LoweredResult{fir::ExtendedValue{callResult}}, + /*resultIsFinalized=*/false}; } static hlfir::EntityWithAttributes genStmtFunctionRef( @@ -1661,19 +1690,25 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals, // Prepare lowered arguments according to the interface // and map the lowered values to the dummy // arguments. - auto [result, resultIsFinalized] = Fortran::lower::genCallOpAndResult( + auto [loweredResult, resultIsFinalized] = Fortran::lower::genCallOpAndResult( loc, callContext.converter, callContext.symMap, callContext.stmtCtx, caller, callSiteType, callContext.resultType, callContext.isElementalProcWithArrayArgs()); - // For procedure pointer function result, just return the call. - if (callContext.resultType && - mlir::isa(*callContext.resultType)) - return hlfir::EntityWithAttributes(fir::getBase(result)); /// Clean-up associations and copy-in. for (auto cleanUp : callCleanUps) cleanUp.genCleanUp(loc, builder); + if (auto *entity = std::get_if(&loweredResult)) + return *entity; + + auto &result = std::get(loweredResult); + + // For procedure pointer function result, just return the call. + if (callContext.resultType && + mlir::isa(*callContext.resultType)) + return hlfir::EntityWithAttributes(fir::getBase(result)); + if (!fir::getBase(result)) return std::nullopt; // subroutine call. diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 46168b81dd3a0..7698fac89c223 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -2852,10 +2852,11 @@ class ScalarExprLowering { } } - ExtValue result = + auto loweredResult = Fortran::lower::genCallOpAndResult(loc, converter, symMap, stmtCtx, caller, callSiteType, resultType) .first; + auto &result = std::get(loweredResult); // Sync pointers and allocatables that may have been modified during the // call. @@ -4881,10 +4882,12 @@ class ArrayExprLowering { [&](const auto &) { return fir::getBase(exv); }); caller.placeInput(argIface, arg); } - return Fortran::lower::genCallOpAndResult(loc, converter, symMap, - getElementCtx(), caller, - callSiteType, retTy) - .first; + Fortran::lower::LoweredResult res = + Fortran::lower::genCallOpAndResult(loc, converter, symMap, + getElementCtx(), caller, + callSiteType, retTy) + .first; + return std::get(res); }; } diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp index 0b61c0edce622..d67b5fa659807 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp @@ -215,3 +215,16 @@ bool hlfir::mayHaveAllocatableComponent(mlir::Type ty) { return fir::isPolymorphicType(ty) || fir::isUnlimitedPolymorphicType(ty) || fir::isRecordWithAllocatableMember(hlfir::getFortranElementType(ty)); } + +mlir::Type hlfir::getExprType(mlir::Type variableType) { + hlfir::ExprType::Shape typeShape; + bool isPolymorphic = fir::isPolymorphicType(variableType); + mlir::Type type = getFortranElementOrSequenceType(variableType); + if (auto seqType = mlir::dyn_cast(type)) { + assert(!seqType.hasUnknownShape() && "assumed-rank cannot be expressions"); + typeShape.append(seqType.getShape().begin(), seqType.getShape().end()); + type = seqType.getEleTy(); + } + return hlfir::ExprType::get(variableType.getContext(), typeShape, type, + isPolymorphic); +} diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index 8751988244648..3a172d1b8b540 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -1427,16 +1427,7 @@ llvm::LogicalResult hlfir::EndAssociateOp::verify() { void hlfir::AsExprOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Value var, mlir::Value mustFree) { - hlfir::ExprType::Shape typeShape; - bool isPolymorphic = fir::isPolymorphicType(var.getType()); - mlir::Type type = getFortranElementOrSequenceType(var.getType()); - if (auto seqType = mlir::dyn_cast(type)) { - typeShape.append(seqType.getShape().begin(), seqType.getShape().end()); - type = seqType.getEleTy(); - } - - auto resultType = hlfir::ExprType::get(builder.getContext(), typeShape, type, - isPolymorphic); + mlir::Type resultType = hlfir::getExprType(var.getType()); return build(builder, result, resultType, var, mustFree); } diff --git a/flang/test/HLFIR/order_assignments/where-scheduling.f90 b/flang/test/HLFIR/order_assignments/where-scheduling.f90 index 3010476d4a188..6feaba0d3389a 100644 --- a/flang/test/HLFIR/order_assignments/where-scheduling.f90 +++ b/flang/test/HLFIR/order_assignments/where-scheduling.f90 @@ -134,7 +134,7 @@ end function f !CHECK-NEXT: run 1 save : where/mask !CHECK-NEXT: run 2 evaluate: where/region_assign1 !CHECK-LABEL: ------------ scheduling where in _QPonly_once ------------ -!CHECK-NEXT: unknown effect: %{{[0-9]+}} = llvm.intr.stacksave : !llvm.ptr +!CHECK-NEXT: unknown effect: %11 = fir.call @_QPcall_me_only_once() fastmath : () -> !fir.array<10x!fir.logical<4>> !CHECK-NEXT: saving eval because write effect prevents re-evaluation !CHECK-NEXT: run 1 save (w): where/mask !CHECK-NEXT: run 2 evaluate: where/region_assign1 diff --git a/flang/test/Lower/HLFIR/calls-array-results.f90 b/flang/test/Lower/HLFIR/calls-array-results.f90 new file mode 100644 index 0000000000000..d91844cc2e6f8 --- /dev/null +++ b/flang/test/Lower/HLFIR/calls-array-results.f90 @@ -0,0 +1,131 @@ +! RUN: bbc -emit-hlfir -o - %s -I nowhere | FileCheck %s + +subroutine simple_test() + implicit none + interface + function array_func() + real :: array_func(10) + end function + end interface + real :: x(10) + x = array_func() +end subroutine + +subroutine arg_test(n) + implicit none + interface + function array_func_2(n) + integer(8) :: n + real :: array_func_2(n) + end function + end interface + integer(8) :: n + real :: x(n) + x = array_func_2(n) +end subroutine + +module type_defs + interface + function array_func() + real :: array_func(10) + end function + end interface + type t + contains + procedure, nopass :: array_func => array_func + end type +end module + +subroutine dispatch_test(x, a) + use type_defs, only : t + implicit none + real :: x(10) + class(t) :: a + x = a%array_func() +end subroutine + +! CHECK-LABEL: func.func @_QPsimple_test() { +! CHECK: %[[VAL_0:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<10xf32> {bindc_name = "x", uniq_name = "_QFsimple_testEx"} +! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_2]]) {uniq_name = "_QFsimple_testEx"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i64 +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_6:.*]] = arith.subi %[[VAL_4]], %[[VAL_5]] : i64 +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_6]], %[[VAL_7]] : i64 +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i64) -> index +! CHECK: %[[VAL_10:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index +! CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_9]], %[[VAL_10]] : index +! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_14:.*]] = hlfir.eval_in_mem shape %[[VAL_13]] : (!fir.shape<1>) -> !hlfir.expr<10xf32> { +! CHECK: ^bb0(%[[VAL_15:.*]]: !fir.ref>): +! CHECK: %[[VAL_16:.*]] = fir.call @_QParray_func() fastmath : () -> !fir.array<10xf32> +! CHECK: fir.save_result %[[VAL_16]] to %[[VAL_15]](%[[VAL_13]]) : !fir.array<10xf32>, !fir.ref>, !fir.shape<1> +! CHECK: } +! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_3]]#0 : !hlfir.expr<10xf32>, !fir.ref> +! CHECK: hlfir.destroy %[[VAL_14]] : !hlfir.expr<10xf32> +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QParg_test( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFarg_testEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.array, %[[VAL_7]] {bindc_name = "x", uniq_name = "_QFarg_testEx"} +! CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_9]]) {uniq_name = "_QFarg_testEx"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]]#1 {uniq_name = "_QFarg_testFarray_func_2En"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_15]] : i64 +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i64) -> index +! CHECK: %[[VAL_18:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_17]], %[[VAL_18]] : index +! CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_19]], %[[VAL_17]], %[[VAL_18]] : index +! CHECK: %[[VAL_21:.*]] = fir.shape %[[VAL_20]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_22:.*]] = hlfir.eval_in_mem shape %[[VAL_21]] : (!fir.shape<1>) -> !hlfir.expr { +! CHECK: ^bb0(%[[VAL_23:.*]]: !fir.ref>): +! CHECK: %[[VAL_24:.*]] = fir.call @_QParray_func_2(%[[VAL_2]]#1) fastmath : (!fir.ref) -> !fir.array +! CHECK: fir.save_result %[[VAL_24]] to %[[VAL_23]](%[[VAL_21]]) : !fir.array, !fir.ref>, !fir.shape<1> +! CHECK: } +! CHECK: hlfir.assign %[[VAL_22]] to %[[VAL_10]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_22]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QPdispatch_test( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.class> {fir.bindc_name = "a"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFdispatch_testEa"} : (!fir.class>, !fir.dscope) -> (!fir.class>, !fir.class>) +! CHECK: %[[VAL_4:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_5]]) dummy_scope %[[VAL_2]] {uniq_name = "_QFdispatch_testEx"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_7:.*]] = arith.constant 10 : i64 +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_7]], %[[VAL_8]] : i64 +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i64 +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i64) -> index +! CHECK: %[[VAL_13:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_13]] : index +! CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_12]], %[[VAL_13]] : index +! CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_17:.*]] = hlfir.eval_in_mem shape %[[VAL_16]] : (!fir.shape<1>) -> !hlfir.expr<10xf32> { +! CHECK: ^bb0(%[[VAL_18:.*]]: !fir.ref>): +! CHECK: %[[VAL_19:.*]] = fir.dispatch "array_func"(%[[VAL_3]]#1 : !fir.class>) -> !fir.array<10xf32> +! CHECK: fir.save_result %[[VAL_19]] to %[[VAL_18]](%[[VAL_16]]) : !fir.array<10xf32>, !fir.ref>, !fir.shape<1> +! CHECK: } +! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_6]]#0 : !hlfir.expr<10xf32>, !fir.ref> +! CHECK: hlfir.destroy %[[VAL_17]] : !hlfir.expr<10xf32> +! CHECK: return +! CHECK: } diff --git a/flang/test/Lower/HLFIR/where-nonelemental.f90 b/flang/test/Lower/HLFIR/where-nonelemental.f90 index 643f417c47674..7be5831890012 100644 --- a/flang/test/Lower/HLFIR/where-nonelemental.f90 +++ b/flang/test/Lower/HLFIR/where-nonelemental.f90 @@ -26,11 +26,12 @@ real elemental function elem_func(x) ! CHECK-LABEL: func.func @_QPtest_where( ! CHECK: hlfir.where { ! CHECK-NOT: hlfir.exactly_once -! CHECK: %[[VAL_17:.*]] = llvm.intr.stacksave : !llvm.ptr -! CHECK: %[[VAL_19:.*]] = fir.call @_QPlogical_func1() fastmath : () -> !fir.array<100x!fir.logical<4>> -! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { -! CHECK: llvm.intr.stackrestore %[[VAL_17]] : !llvm.ptr -! CHECK: } +! CHECK: %[[VAL_19:.*]] = hlfir.eval_in_mem {{.*}} { +! CHECK: fir.call @_QPlogical_func1() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: } +! CHECK: hlfir.yield %[[VAL_19]] : !hlfir.expr<100x!fir.logical<4>> cleanup { +! CHECK: hlfir.destroy %[[VAL_19]] +! CHECK: } ! CHECK: } do { ! CHECK: hlfir.region_assign { ! CHECK: %[[VAL_24:.*]] = hlfir.exactly_once : f32 { @@ -70,10 +71,11 @@ real elemental function elem_func(x) ! CHECK: } ! CHECK: hlfir.elsewhere mask { ! CHECK: %[[VAL_62:.*]] = hlfir.exactly_once : !hlfir.expr<100x!fir.logical<4>> { -! CHECK: %[[VAL_72:.*]] = llvm.intr.stacksave : !llvm.ptr -! CHECK: fir.call @_QPlogical_func2() fastmath : () -> !fir.array<100x!fir.logical<4>> -! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { -! CHECK: llvm.intr.stackrestore %[[VAL_72]] : !llvm.ptr +! CHECK: %[[VAL_72:.*]] = hlfir.eval_in_mem {{.*}} { +! CHECK: fir.call @_QPlogical_func2() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: } +! CHECK: hlfir.yield %[[VAL_72]] : !hlfir.expr<100x!fir.logical<4>> cleanup { +! CHECK: hlfir.destroy %[[VAL_72]] ! CHECK: } ! CHECK: } ! CHECK: hlfir.yield %[[VAL_62]] : !hlfir.expr<100x!fir.logical<4>> @@ -123,11 +125,12 @@ integer pure function pure_ifoo() ! CHECK: } (%[[VAL_10:.*]]: i32) { ! CHECK: %[[VAL_11:.*]] = hlfir.forall_index "i" %[[VAL_10]] : (i32) -> !fir.ref ! CHECK: hlfir.where { -! CHECK: %[[VAL_21:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK-NOT: hlfir.exactly_once -! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> -! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { -! CHECK: llvm.intr.stackrestore %[[VAL_21]] : !llvm.ptr +! CHECK: %[[VAL_23:.*]] = hlfir.eval_in_mem {{.*}} { +! CHECK: fir.call @_QPpure_logical_func1() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: } +! CHECK: hlfir.yield %[[VAL_23]] : !hlfir.expr<100x!fir.logical<4>> cleanup { +! CHECK: hlfir.destroy %[[VAL_23]] ! CHECK: } ! CHECK: } do { ! CHECK: hlfir.region_assign { @@ -172,10 +175,11 @@ integer pure function pure_ifoo() ! CHECK: } ! CHECK: hlfir.elsewhere mask { ! CHECK: %[[VAL_129:.*]] = hlfir.exactly_once : !hlfir.expr<100x!fir.logical<4>> { -! CHECK: %[[VAL_139:.*]] = llvm.intr.stacksave : !llvm.ptr -! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> -! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { -! CHECK: llvm.intr.stackrestore %[[VAL_139]] : !llvm.ptr +! CHECK: %[[VAL_139:.*]] = hlfir.eval_in_mem {{.*}} { +! CHECK: fir.call @_QPpure_logical_func2() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: } +! CHECK: hlfir.yield %[[VAL_139]] : !hlfir.expr<100x!fir.logical<4>> cleanup { +! CHECK: hlfir.destroy %[[VAL_139]] ! CHECK: } ! CHECK: } ! CHECK: hlfir.yield %[[VAL_129]] : !hlfir.expr<100x!fir.logical<4>> diff --git a/flang/test/Lower/explicit-interface-results-2.f90 b/flang/test/Lower/explicit-interface-results-2.f90 index 95aee84f4a644..2336053c32a54 100644 --- a/flang/test/Lower/explicit-interface-results-2.f90 +++ b/flang/test/Lower/explicit-interface-results-2.f90 @@ -252,12 +252,10 @@ subroutine test_call_to_used_interface(dummy_proc) call takes_array(dummy_proc()) ! CHECK: %[[VAL_1:.*]] = arith.constant 100 : index ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.array<100xf32> {bindc_name = ".result"} -! CHECK: %[[VAL_3:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_5:.*]] = fir.box_addr %[[VAL_0]] : (!fir.boxproc<() -> ()>) -> (() -> !fir.array<100xf32>) ! CHECK: %[[VAL_6:.*]] = fir.call %[[VAL_5]]() {{.*}}: () -> !fir.array<100xf32> ! CHECK: fir.save_result %[[VAL_6]] to %[[VAL_2]](%[[VAL_4]]) : !fir.array<100xf32>, !fir.ref>, !fir.shape<1> ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.ref> ! CHECK: fir.call @_QPtakes_array(%[[VAL_7]]) {{.*}}: (!fir.ref>) -> () -! CHECK: llvm.intr.stackrestore %[[VAL_3]] : !llvm.ptr end subroutine diff --git a/flang/test/Lower/explicit-interface-results.f90 b/flang/test/Lower/explicit-interface-results.f90 index 623e875b5f9c9..612d57be36448 100644 --- a/flang/test/Lower/explicit-interface-results.f90 +++ b/flang/test/Lower/explicit-interface-results.f90 @@ -195,8 +195,8 @@ subroutine dyn_array(m, n) ! CHECK-DAG: %[[ncast2:.*]] = fir.convert %[[nadd]] : (i64) -> index ! CHECK-DAG: %[[ncmpi:.*]] = arith.cmpi sgt, %[[ncast2]], %{{.*}} : index ! CHECK-DAG: %[[nselect:.*]] = arith.select %[[ncmpi]], %[[ncast2]], %{{.*}} : index - ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array, %[[mselect]], %[[nselect]] ! CHECK: %[[shape:.*]] = fir.shape %[[mselect]], %[[nselect]] : (index, index) -> !fir.shape<2> + ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array, %[[mselect]], %[[nselect]] ! CHECK: %[[res:.*]] = fir.call @_QMcalleePreturn_dyn_array(%[[m]], %[[n]]) {{.*}}: (!fir.ref, !fir.ref) -> !fir.array ! CHECK: fir.save_result %[[res]] to %[[tmp]](%[[shape]]) : !fir.array, !fir.ref>, !fir.shape<2> print *, return_dyn_array(m, n) @@ -211,8 +211,8 @@ subroutine dyn_char_cst_array(l) ! CHECK: %[[lcast2:.*]] = fir.convert %[[lcast]] : (i64) -> index ! CHECK: %[[cmpi:.*]] = arith.cmpi sgt, %[[lcast2]], %{{.*}} : index ! CHECK: %[[select:.*]] = arith.select %[[cmpi]], %[[lcast2]], %{{.*}} : index - ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array<20x30x!fir.char<1,?>>(%[[select]] : index) ! CHECK: %[[shape:.*]] = fir.shape %{{.*}}, %{{.*}} : (index, index) -> !fir.shape<2> + ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array<20x30x!fir.char<1,?>>(%[[select]] : index) ! CHECK: %[[res:.*]] = fir.call @_QMcalleePreturn_dyn_char_cst_array(%[[l]]) {{.*}}: (!fir.ref) -> !fir.array<20x30x!fir.char<1,?>> ! CHECK: fir.save_result %[[res]] to %[[tmp]](%[[shape]]) typeparams %[[select]] : !fir.array<20x30x!fir.char<1,?>>, !fir.ref>>, !fir.shape<2>, index print *, return_dyn_char_cst_array(l) @@ -236,8 +236,8 @@ subroutine cst_char_dyn_array(m, n) ! CHECK-DAG: %[[ncast2:.*]] = fir.convert %[[nadd]] : (i64) -> index ! CHECK-DAG: %[[ncmpi:.*]] = arith.cmpi sgt, %[[ncast2]], %{{.*}} : index ! CHECK-DAG: %[[nselect:.*]] = arith.select %[[ncmpi]], %[[ncast2]], %{{.*}} : index - ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array>, %[[mselect]], %[[nselect]] ! CHECK: %[[shape:.*]] = fir.shape %[[mselect]], %[[nselect]] : (index, index) -> !fir.shape<2> + ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array>, %[[mselect]], %[[nselect]] ! CHECK: %[[res:.*]] = fir.call @_QMcalleePreturn_cst_char_dyn_array(%[[m]], %[[n]]) {{.*}}: (!fir.ref, !fir.ref) -> !fir.array> ! CHECK: fir.save_result %[[res]] to %[[tmp]](%[[shape]]) typeparams {{.*}} : !fir.array>, !fir.ref>>, !fir.shape<2>, index print *, return_cst_char_dyn_array(m, n) @@ -267,8 +267,8 @@ subroutine dyn_char_dyn_array(l, m, n) ! CHECK-DAG: %[[lcast2:.*]] = fir.convert %[[lcast]] : (i64) -> index ! CHECK-DAG: %[[lcmpi:.*]] = arith.cmpi sgt, %[[lcast2]], %{{.*}} : index ! CHECK-DAG: %[[lselect:.*]] = arith.select %[[lcmpi]], %[[lcast2]], %{{.*}} : index - ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array>(%[[lselect]] : index), %[[mselect]], %[[nselect]] ! CHECK: %[[shape:.*]] = fir.shape %[[mselect]], %[[nselect]] : (index, index) -> !fir.shape<2> + ! CHECK: %[[tmp:.*]] = fir.alloca !fir.array>(%[[lselect]] : index), %[[mselect]], %[[nselect]] ! CHECK: %[[res:.*]] = fir.call @_QMcalleePreturn_dyn_char_dyn_array(%[[l]], %[[m]], %[[n]]) {{.*}}: (!fir.ref, !fir.ref, !fir.ref) -> !fir.array> ! CHECK: fir.save_result %[[res]] to %[[tmp]](%[[shape]]) typeparams {{.*}} : !fir.array>, !fir.ref>>, !fir.shape<2>, index integer :: l, m, n diff --git a/flang/test/Lower/forall/array-constructor.f90 b/flang/test/Lower/forall/array-constructor.f90 index 4c8c756ea689c..6b6b46fdd4688 100644 --- a/flang/test/Lower/forall/array-constructor.f90 +++ b/flang/test/Lower/forall/array-constructor.f90 @@ -232,8 +232,8 @@ end subroutine ac2 ! CHECK: %[[C0:.*]] = arith.constant 0 : index ! CHECK: %[[CMPI:.*]] = arith.cmpi sgt, %[[VAL_80]], %[[C0]] : index ! CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[VAL_80]], %[[C0]] : index -! CHECK: %[[VAL_81:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK: %[[VAL_82:.*]] = fir.shape %[[SELECT]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_81:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK: %[[VAL_83:.*]] = fir.convert %[[VAL_74]] : (!fir.box>) -> !fir.box> ! CHECK: %[[VAL_84:.*]] = fir.call @_QFac2Pfunc(%[[VAL_83]]) {{.*}}: (!fir.box>) -> !fir.array<3xi32> ! CHECK: fir.save_result %[[VAL_84]] to %[[VAL_2]](%[[VAL_82]]) : !fir.array<3xi32>, !fir.ref>, !fir.shape<1> From fed3a9b8f81f5f4450e515f4499ecfda95804e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Tue, 3 Dec 2024 09:07:32 +0000 Subject: [PATCH 039/191] [mlir] Add ScalableVectorType and FixedVectorType (#87986) This PR adds two small convenience Vector types: * `ScalableVectorType` and `FixedVectorType`. The goal of these new types is two-fold: * Enable idiomatic checks like `isa(...)`. * Make the split into "Scalable" and "Fixed-wdith" vectors a bit more explicit and more visible in the code-base. The new types are added in mlir/include/mlir/IR (instead of e.g. mlir/include/mlir/Dialect/Vector) so that the new types can be used without requiring any new dependency (e.g. on the Vector dialect). --- mlir/include/mlir/IR/VectorTypes.h | 51 ++++++++++++++++++++++++++ mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 6 ++- 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 mlir/include/mlir/IR/VectorTypes.h diff --git a/mlir/include/mlir/IR/VectorTypes.h b/mlir/include/mlir/IR/VectorTypes.h new file mode 100644 index 0000000000000..c209f869a579d --- /dev/null +++ b/mlir/include/mlir/IR/VectorTypes.h @@ -0,0 +1,51 @@ +//===- VectorTypes.h - MLIR Vector Types ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Convenience wrappers for `VectorType` to allow idiomatic code like +// * isa(type) +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_VECTORTYPES_H +#define MLIR_IR_VECTORTYPES_H + +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Types.h" + +namespace mlir { +namespace vector { + +/// A vector type containing at least one scalable dimension. +class ScalableVectorType : public VectorType { +public: + using VectorType::VectorType; + + static bool classof(Type type) { + auto vecTy = llvm::dyn_cast(type); + if (!vecTy) + return false; + return vecTy.isScalable(); + } +}; + +/// A vector type with no scalable dimensions. +class FixedVectorType : public VectorType { +public: + using VectorType::VectorType; + static bool classof(Type type) { + auto vecTy = llvm::dyn_cast(type); + if (!vecTy) + return false; + return !vecTy.isScalable(); + } +}; + +} // namespace vector +} // namespace mlir + +#endif // MLIR_IR_VECTORTYPES_H diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 74c64761565d6..fe7646140db7e 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -21,6 +21,8 @@ #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/VectorTypes.h" +#include "mlir/Support/LogicalResult.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -224,8 +226,8 @@ LogicalResult arith::ConstantOp::verify() { // Note, we could relax this for vectors with 1 scalable dim, e.g.: // * arith.constant dense<[[3, 3], [1, 1]]> : vector<2 x [2] x i32> // However, this would most likely require updating the lowerings to LLVM. - auto vecType = dyn_cast(type); - if (vecType && vecType.isScalable() && !isa(getValue())) + if (isa(type) && + !isa(getValue())) return emitOpError( "intializing scalable vectors with elements attribute is not supported" " unless it's a vector splat"); From 59bb9b915ef9137709313190395da56364b92db6 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 3 Dec 2024 10:14:33 +0100 Subject: [PATCH 040/191] [lldb] Expose discontinuous functions through SBFunction::GetRanges (#117532) SBFunction::GetEndAddress doesn't really make sense for discontinuous functions, so I'm declaring it deprecated. GetStartAddress sort of makes sense, if one uses it to find the functions entry point, so I'm keeping that undeprecated. I've made the test a Shell tests because these make it easier to create discontinuous functions regardless of the host os and architecture. They do make testing the python API harder, but I think I've managed to come up with something not entirely unreasonable. --- lldb/include/lldb/API/SBAddressRangeList.h | 1 + lldb/include/lldb/API/SBFunction.h | 2 + lldb/include/lldb/Core/AddressRangeListImpl.h | 5 +- lldb/include/lldb/Symbol/Function.h | 3 + lldb/source/API/SBFunction.cpp | 17 +- lldb/source/Core/AddressRangeListImpl.cpp | 8 - .../Python/sb_function_ranges.s | 182 ++++++++++++++++++ 7 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s diff --git a/lldb/include/lldb/API/SBAddressRangeList.h b/lldb/include/lldb/API/SBAddressRangeList.h index 5a4eeecf37dc9..41085b1edf8d7 100644 --- a/lldb/include/lldb/API/SBAddressRangeList.h +++ b/lldb/include/lldb/API/SBAddressRangeList.h @@ -45,6 +45,7 @@ class LLDB_API SBAddressRangeList { private: friend class SBBlock; friend class SBProcess; + friend class SBFunction; lldb_private::AddressRangeListImpl &ref() const; diff --git a/lldb/include/lldb/API/SBFunction.h b/lldb/include/lldb/API/SBFunction.h index df607fdc7ebf5..0a8aeeff1ea5a 100644 --- a/lldb/include/lldb/API/SBFunction.h +++ b/lldb/include/lldb/API/SBFunction.h @@ -43,6 +43,8 @@ class LLDB_API SBFunction { lldb::SBAddress GetStartAddress(); + LLDB_DEPRECATED_FIXME("Not compatible with discontinuous functions.", + "GetRanges()") lldb::SBAddress GetEndAddress(); lldb::SBAddressRangeList GetRanges(); diff --git a/lldb/include/lldb/Core/AddressRangeListImpl.h b/lldb/include/lldb/Core/AddressRangeListImpl.h index 6742e6ead87de..6b88f9b1ac179 100644 --- a/lldb/include/lldb/Core/AddressRangeListImpl.h +++ b/lldb/include/lldb/Core/AddressRangeListImpl.h @@ -24,9 +24,8 @@ class AddressRangeListImpl { public: AddressRangeListImpl(); - AddressRangeListImpl(const AddressRangeListImpl &rhs) = default; - - AddressRangeListImpl &operator=(const AddressRangeListImpl &rhs); + explicit AddressRangeListImpl(AddressRanges ranges) + : m_ranges(std::move(ranges)) {} size_t GetSize() const; diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 70f51a846f8d9..855940a6415d7 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -444,8 +444,11 @@ class Function : public UserID, public SymbolContextScope { Function *CalculateSymbolContextFunction() override; + /// DEPRECATED: Use GetAddressRanges instead. const AddressRange &GetAddressRange() { return m_range; } + const AddressRanges &GetAddressRanges() const { return m_ranges; } + lldb::LanguageType GetLanguage() const; /// Find the file and line number of the source location of the start of the /// function. This will use the declaration if present and fall back on the diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index ac61220ec8736..2ef62eea4d199 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -10,6 +10,7 @@ #include "lldb/API/SBAddressRange.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" +#include "lldb/Core/AddressRangeListImpl.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/Module.h" #include "lldb/Symbol/CompileUnit.h" @@ -153,10 +154,11 @@ SBAddress SBFunction::GetEndAddress() { SBAddress addr; if (m_opaque_ptr) { - addr_t byte_size = m_opaque_ptr->GetAddressRange().GetByteSize(); - if (byte_size > 0) { - addr.SetAddress(m_opaque_ptr->GetAddressRange().GetBaseAddress()); - addr->Slide(byte_size); + llvm::ArrayRef ranges = m_opaque_ptr->GetAddressRanges(); + if (!ranges.empty()) { + // Return the end of the first range, use GetRanges to get all ranges. + addr.SetAddress(ranges.front().GetBaseAddress()); + addr->Slide(ranges.front().GetByteSize()); } } return addr; @@ -166,11 +168,8 @@ lldb::SBAddressRangeList SBFunction::GetRanges() { LLDB_INSTRUMENT_VA(this); lldb::SBAddressRangeList ranges; - if (m_opaque_ptr) { - lldb::SBAddressRange range; - (*range.m_opaque_up) = m_opaque_ptr->GetAddressRange(); - ranges.Append(std::move(range)); - } + if (m_opaque_ptr) + ranges.ref() = AddressRangeListImpl(m_opaque_ptr->GetAddressRanges()); return ranges; } diff --git a/lldb/source/Core/AddressRangeListImpl.cpp b/lldb/source/Core/AddressRangeListImpl.cpp index d405cf0fa3ec3..257824a0551e1 100644 --- a/lldb/source/Core/AddressRangeListImpl.cpp +++ b/lldb/source/Core/AddressRangeListImpl.cpp @@ -13,14 +13,6 @@ using namespace lldb_private; AddressRangeListImpl::AddressRangeListImpl() : m_ranges() {} -AddressRangeListImpl & -AddressRangeListImpl::operator=(const AddressRangeListImpl &rhs) { - if (this == &rhs) - return *this; - m_ranges = rhs.m_ranges; - return *this; -} - size_t AddressRangeListImpl::GetSize() const { return m_ranges.size(); } void AddressRangeListImpl::Reserve(size_t capacity) { diff --git a/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s new file mode 100644 index 0000000000000..09b41148c7068 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s @@ -0,0 +1,182 @@ +# REQUIRES: x86 + +# RUN: split-file %s %t +# RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %t/input.s -o %t/input.o +# RUN: %lldb %t/input.o -o "command script import %t/script.py" -o exit | FileCheck %s + +# CHECK: Found 1 function(s). +# CHECK: foo: [input.o[0x0-0x7), input.o[0x7-0xe), input.o[0x14-0x1b), input.o[0x1b-0x1c)] + +#--- script.py +import lldb + +def __lldb_init_module(debugger, internal_dict): + target = debugger.GetSelectedTarget() + sym_ctxs = target.FindFunctions("foo") + print(f"Found {len(sym_ctxs)} function(s).") + for ctx in sym_ctxs: + fn = ctx.function + print(f"{fn.name}: {fn.GetRanges()}") + +#--- input.s +# An example of a function which has been split into two parts. Roughly +# corresponds to this C code. +# int baz(); +# int bar() { return 47; } +# int foo(int flag) { return flag ? bar() : baz(); } +# The function bar has been placed "in the middle" of foo. + + .text + + .type foo,@function +foo: + .cfi_startproc + cmpl $0, %edi + je foo.__part.2 + jmp foo.__part.1 + .cfi_endproc +.Lfoo_end: + .size foo, .Lfoo_end-foo + +foo.__part.1: + .cfi_startproc + callq bar + jmp foo.__part.3 +.Lfoo.__part.1_end: + .size foo.__part.1, .Lfoo.__part.1_end-foo.__part.1 + .cfi_endproc + +bar: + .cfi_startproc + movl $47, %eax + retq + .cfi_endproc +.Lbar_end: + .size bar, .Lbar_end-bar + +foo.__part.2: + .cfi_startproc + callq baz + jmp foo.__part.3 +.Lfoo.__part.2_end: + .size foo.__part.2, .Lfoo.__part.2_end-foo.__part.2 + .cfi_endproc + +foo.__part.3: + .cfi_startproc + retq +.Lfoo.__part.3_end: + .size foo.__part.3, .Lfoo.__part.3_end-foo.__part.3 + .cfi_endproc + + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 1 # DW_FORM_addr + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 29 # DW_AT_language + .quad 0 # DW_AT_low_pc + .byte 1 # DW_AT_ranges + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] DW_TAG_subprogram + .quad bar # DW_AT_low_pc + .quad .Lbar_end # DW_AT_high_pc + .asciz "bar" # DW_AT_name + .byte 3 # Abbrev [3] DW_TAG_subprogram + .byte 0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .asciz "foo" # DW_AT_name + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 2 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 6 # DW_RLE_start_end + .quad foo + .quad .Lfoo_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.1 + .quad .Lfoo.__part.1_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.2 + .quad .Lfoo.__part.2_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.3 + .quad .Lfoo.__part.3_end + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 6 # DW_RLE_start_end + .quad bar + .quad .Lbar_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.1 + .quad .Lfoo.__part.1_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.2 + .quad .Lfoo.__part.2_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.3 + .quad .Lfoo.__part.3_end + .byte 6 # DW_RLE_start_end + .quad foo + .quad .Lfoo_end + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + + .section ".note.GNU-stack","",@progbits From 78e35e4c0eea6c2f8184f7b98e6a385e981512db Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 3 Dec 2024 17:14:55 +0800 Subject: [PATCH 041/191] [ValueTracking] Fix typo in `isKnownNegative` and `MaskedValueIsZero` NFC. (#118438) Fix typos introduced by https://github.com/llvm/llvm-project/commit/d9e8ae7d2f74fff629a8be10835677ce8039d452 and https://github.com/llvm/llvm-project/commit/42b6c8ed3ae44ed61af5ba81bdaa5b81a9ce61ad. --- llvm/include/llvm/Analysis/ValueTracking.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index bd74d27e0c49b..c408e0a39cd18 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -159,7 +159,7 @@ bool isKnownPositive(const Value *V, const SimplifyQuery &SQ, /// Returns true if the given value is known be negative (i.e. non-positive /// and non-zero). -bool isKnownNegative(const Value *V, const SimplifyQuery &DL, +bool isKnownNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth = 0); /// Return true if the given values are known to be non-equal when defined. @@ -180,7 +180,7 @@ bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. bool MaskedValueIsZero(const Value *V, const APInt &Mask, - const SimplifyQuery &DL, unsigned Depth = 0); + const SimplifyQuery &SQ, unsigned Depth = 0); /// Return the number of times the sign bit of the register is replicated into /// the other bits. We know that at least 1 bit is always equal to the sign From dd2b2b8bbbf4e917a84efe94cbeaaab2ed350fc9 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 3 Dec 2024 01:16:49 -0800 Subject: [PATCH 042/191] [clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic (#111883) partially fixes #70103 ### Changes * Implemented `GroupMemoryBarrierWithGroupSync` clang builtin * Linked `GroupMemoryBarrierWithGroupSync` clang builtin with `hlsl_intrinsics.h` * Added sema checks for `GroupMemoryBarrierWithGroupSync` to `CheckHLSLBuiltinFunctionCall` in `SemaChecking.cpp` * Add codegen for `GroupMemoryBarrierWithGroupSync` to `EmitHLSLBuiltinExpr` in `CGBuiltin.cpp` * Add codegen tests to `clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl` * Add sema tests to `clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrierWithGroupSync-errors.hlsl` ### Related PRs * [[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic #111884](https://github.com/llvm/llvm-project/pull/111884) * [[SPIRV] Add GroupMemoryBarrierWithGroupSync intrinsic #111888](https://github.com/llvm/llvm-project/pull/111888) --- clang/include/clang/Basic/Builtins.td | 6 ++++++ clang/lib/CodeGen/CGBuiltin.cpp | 6 ++++++ clang/lib/CodeGen/CGHLSLRuntime.h | 2 ++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 12 +++++++++++ .../GroupMemoryBarrierWithGroupSync.hlsl | 20 +++++++++++++++++++ ...roupMemoryBarrierWithGroupSync-errors.hlsl | 6 ++++++ 6 files changed, 52 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrierWithGroupSync-errors.hlsl diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index dda44f3abe016..e2c3d3c535571 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4930,6 +4930,12 @@ def HLSLClip: LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_group_memory_barrier_with_group_sync"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void()"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a54dd884c7fa5..7588f8427cdd3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19456,6 +19456,12 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { assert(E->getArg(0)->getType()->hasFloatingRepresentation() && "clip operands types mismatch"); return handleHlslClip(E, this); + case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: { + Intrinsic::ID ID = + CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } } return nullptr; } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 854214d6bc067..bb120c8b5e9e6 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -103,6 +103,8 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding) GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, bufferUpdateCounter) + GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, + group_memory_barrier_with_group_sync) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index a3e0b5c65a6f5..1126e13600f8a 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -2481,5 +2481,17 @@ float3 radians(float3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians) float4 radians(float4); +//===----------------------------------------------------------------------===// +// GroupMemoryBarrierWithGroupSync builtins +//===----------------------------------------------------------------------===// + +/// \fn void GroupMemoryBarrierWithGroupSync(void) +/// \brief Blocks execution of all threads in a group until all group shared +/// accesses have been completed and all threads in the group have reached this +/// call. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync) +void GroupMemoryBarrierWithGroupSync(void); + } // namespace hlsl #endif //_HLSL_HLSL_INTRINSICS_H_ diff --git a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl new file mode 100644 index 0000000000000..9d95d54852c0b --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=dx -DFNATTRS=noundef -check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" -check-prefixes=CHECK,CHECK-SPIRV + +// CHECK-DXIL: define void @ +// CHECK-SPIRV: define spir_func void @ +void test_GroupMemoryBarrierWithGroupSync() { +// CHECK-DXIL: call void @llvm.[[TARGET]].group.memory.barrier.with.group.sync() +// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].group.memory.barrier.with.group.sync() + GroupMemoryBarrierWithGroupSync(); +} + +// CHECK: declare void @llvm.[[TARGET]].group.memory.barrier.with.group.sync() #[[ATTRS:[0-9]+]] +// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}} +// CHECK: attributes #[[ATTRS]] = { diff --git a/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrierWithGroupSync-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrierWithGroupSync-errors.hlsl new file mode 100644 index 0000000000000..24067fbb275b7 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrierWithGroupSync-errors.hlsl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +void test_too_many_arg() { + __builtin_hlsl_group_memory_barrier_with_group_sync(0); + // expected-error@-1 {{too many arguments to function call, expected 0, have 1}} +} From 89a0ee89973c3d213c4bc11c26b41eab67e06da0 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 10:20:30 +0100 Subject: [PATCH 043/191] [clang][bytecode] Handle __builtin_wcslen (#118446) ... just like strlen. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 +++- clang/test/AST/ByteCode/builtin-functions.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8ff0fad0aa5a7..2da16608e26c4 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -243,7 +243,7 @@ static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, unsigned ID = Func->getBuiltinID(); const Pointer &StrPtr = getParam(Frame, 0); - if (ID == Builtin::BIstrlen) + if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen) diagnoseNonConstexprBuiltin(S, OpPC, ID); if (!CheckArray(S, OpPC, StrPtr)) @@ -1857,6 +1857,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, break; case Builtin::BI__builtin_strlen: case Builtin::BIstrlen: + case Builtin::BI__builtin_wcslen: + case Builtin::BIwcslen: if (!interp__builtin_strlen(S, OpPC, Frame, F, Call)) return false; break; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index b94adfa3ab36b..d8c8d207fbc45 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -7,6 +7,10 @@ // RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -fexperimental-new-constant-interpreter %s -verify=expected,both // RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -verify=ref,both %s -Wno-constant-evaluated +extern "C" { + typedef decltype(sizeof(int)) size_t; + extern size_t wcslen(const wchar_t *p); +} namespace strcmp { constexpr char kFoobar[6] = {'f','o','o','b','a','r'}; @@ -85,6 +89,14 @@ constexpr const char *a = "foo\0quux"; constexpr char d[] = { 'f', 'o', 'o' }; // no nul terminator. constexpr int bad = __builtin_strlen(d); // both-error {{constant expression}} \ // both-note {{one-past-the-end}} + + constexpr int wn = __builtin_wcslen(L"hello"); + static_assert(wn == 5); + constexpr int wm = wcslen(L"hello"); // both-error {{constant expression}} \ + // both-note {{non-constexpr function 'wcslen' cannot be used in a constant expression}} + + int arr[3]; // both-note {{here}} + int wk = arr[wcslen(L"hello")]; // both-warning {{array index 5}} } namespace nan { From ba14dac481564000339ba22ab867617590184f4c Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 3 Dec 2024 10:21:04 +0100 Subject: [PATCH 044/191] [lldb] Use the function block as a source for function ranges (#117996) This is a follow-up/reimplementation of #115730. While working on that patch, I did not realize that the correct (discontinuous) set of ranges is already stored in the block representing the whole function. The catch -- ranges for this block are only set later, when parsing all of the blocks of the function. This patch changes that by populating the function block ranges eagerly -- from within the Function constructor. This also necessitates a corresponding change in all of the symbol files -- so that they stop populating the ranges of that block. This allows us to avoid some unnecessary work (not parsing the function DW_AT_ranges twice) and also results in some simplification of the parsing code. --- lldb/include/lldb/Symbol/Function.h | 3 - .../Breakpad/SymbolFileBreakpad.cpp | 4 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 179 +++++++----------- .../SymbolFile/DWARF/SymbolFileDWARF.h | 3 +- .../NativePDB/SymbolFileNativePDB.cpp | 12 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 52 ++--- lldb/source/Symbol/Function.cpp | 16 +- .../DWARF/x86/discontinuous-function.s | 2 +- .../SymbolFile/PDB/function-nested-block.test | 1 - 9 files changed, 109 insertions(+), 163 deletions(-) diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 855940a6415d7..51289f0f74ddf 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -653,9 +653,6 @@ class Function : public UserID, public SymbolContextScope { /// All lexical blocks contained in this function. Block m_block; - /// List of address ranges belonging to the function. - AddressRanges m_ranges; - /// The function address range that covers the widest range needed to contain /// all blocks. DEPRECATED: do not use this field in new code as the range may /// include addresses belonging to other functions. diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index df3bf157278da..bc886259d6fa5 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -299,9 +299,7 @@ size_t SymbolFileBreakpad::ParseBlocksRecursive(Function &func) { // "INLINE 0 ...", the current level is 0 and its parent block is the // function block at index 0. std::vector blocks; - Block &block = func.GetBlock(false); - block.AddRange(Block::Range(0, func.GetAddressRange().GetByteSize())); - blocks.push_back(&block); + blocks.push_back(&func.GetBlock(false)); size_t blocks_added = 0; addr_t func_base = func.GetAddressRange().GetBaseAddress().GetOffset(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index fe711c56958c4..6f19b264eb3dd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1305,121 +1305,76 @@ bool SymbolFileDWARF::ParseDebugMacros(CompileUnit &comp_unit) { return true; } -size_t SymbolFileDWARF::ParseBlocksRecursive( - lldb_private::CompileUnit &comp_unit, Block *parent_block, - const DWARFDIE &orig_die, addr_t subprogram_low_pc, uint32_t depth) { +size_t SymbolFileDWARF::ParseBlocksRecursive(CompileUnit &comp_unit, + Block *parent_block, DWARFDIE die, + addr_t subprogram_low_pc) { size_t blocks_added = 0; - DWARFDIE die = orig_die; - while (die) { + for (; die; die = die.GetSibling()) { dw_tag_t tag = die.Tag(); - switch (tag) { - case DW_TAG_inlined_subroutine: - case DW_TAG_subprogram: - case DW_TAG_lexical_block: { - Block *block = nullptr; - if (tag == DW_TAG_subprogram) { - // Skip any DW_TAG_subprogram DIEs that are inside of a normal or - // inlined functions. These will be parsed on their own as separate - // entities. - - if (depth > 0) - break; + if (tag != DW_TAG_inlined_subroutine && tag != DW_TAG_lexical_block) + continue; - block = parent_block; - } else { - block = parent_block->CreateChild(die.GetID()).get(); - } - DWARFRangeList ranges; - const char *name = nullptr; - const char *mangled_name = nullptr; - - std::optional decl_file; - std::optional decl_line; - std::optional decl_column; - std::optional call_file; - std::optional call_line; - std::optional call_column; - if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, - decl_line, decl_column, call_file, call_line, - call_column, nullptr)) { - if (tag == DW_TAG_subprogram) { - assert(subprogram_low_pc == LLDB_INVALID_ADDRESS); - subprogram_low_pc = ranges.GetMinRangeBase(0); - } else if (tag == DW_TAG_inlined_subroutine) { - // We get called here for inlined subroutines in two ways. The first - // time is when we are making the Function object for this inlined - // concrete instance. Since we're creating a top level block at - // here, the subprogram_low_pc will be LLDB_INVALID_ADDRESS. So we - // need to adjust the containing address. The second time is when we - // are parsing the blocks inside the function that contains the - // inlined concrete instance. Since these will be blocks inside the - // containing "real" function the offset will be for that function. - if (subprogram_low_pc == LLDB_INVALID_ADDRESS) { - subprogram_low_pc = ranges.GetMinRangeBase(0); - } - } - - const size_t num_ranges = ranges.GetSize(); - for (size_t i = 0; i < num_ranges; ++i) { - const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); - const addr_t range_base = range.GetRangeBase(); - if (range_base >= subprogram_low_pc) - block->AddRange(Block::Range(range_base - subprogram_low_pc, - range.GetByteSize())); - else { - GetObjectFile()->GetModule()->ReportError( - "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " - "that is less than the function's low PC {3:x16}. Please file " - "a bug and attach the file at the " - "start of this error message", - block->GetID(), range_base, range.GetRangeEnd(), - subprogram_low_pc); - } - } - block->FinalizeRanges(); - - if (tag != DW_TAG_subprogram && - (name != nullptr || mangled_name != nullptr)) { - std::unique_ptr decl_up; - if (decl_file || decl_line || decl_column) - decl_up = std::make_unique( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - decl_file ? *decl_file : 0), - decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); - - std::unique_ptr call_up; - if (call_file || call_line || call_column) - call_up = std::make_unique( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - call_file ? *call_file : 0), - call_line ? *call_line : 0, call_column ? *call_column : 0); - - block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), - call_up.get()); + Block *block = parent_block->CreateChild(die.GetID()).get(); + DWARFRangeList ranges; + const char *name = nullptr; + const char *mangled_name = nullptr; + + std::optional decl_file; + std::optional decl_line; + std::optional decl_column; + std::optional call_file; + std::optional call_line; + std::optional call_column; + if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, + decl_line, decl_column, call_file, call_line, + call_column, nullptr)) { + const size_t num_ranges = ranges.GetSize(); + for (size_t i = 0; i < num_ranges; ++i) { + const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); + const addr_t range_base = range.GetRangeBase(); + if (range_base >= subprogram_low_pc) + block->AddRange(Block::Range(range_base - subprogram_low_pc, + range.GetByteSize())); + else { + GetObjectFile()->GetModule()->ReportError( + "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " + "that is less than the function's low PC {3:x16}. Please file " + "a bug and attach the file at the " + "start of this error message", + block->GetID(), range_base, range.GetRangeEnd(), + subprogram_low_pc); } + } + block->FinalizeRanges(); + + if (tag != DW_TAG_subprogram && + (name != nullptr || mangled_name != nullptr)) { + std::unique_ptr decl_up; + if (decl_file || decl_line || decl_column) + decl_up = std::make_unique( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + decl_file ? *decl_file : 0), + decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); + + std::unique_ptr call_up; + if (call_file || call_line || call_column) + call_up = std::make_unique( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + call_file ? *call_file : 0), + call_line ? *call_line : 0, call_column ? *call_column : 0); + + block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), + call_up.get()); + } - ++blocks_added; + ++blocks_added; - if (die.HasChildren()) { - blocks_added += - ParseBlocksRecursive(comp_unit, block, die.GetFirstChild(), - subprogram_low_pc, depth + 1); - } + if (die.HasChildren()) { + blocks_added += ParseBlocksRecursive( + comp_unit, block, die.GetFirstChild(), subprogram_low_pc); } - } break; - default: - break; } - - // Only parse siblings of the block if we are not at depth zero. A depth of - // zero indicates we are currently parsing the top level DW_TAG_subprogram - // DIE - - if (depth == 0) - die.Clear(); - else - die = die.GetSibling(); } return blocks_added; } @@ -3240,8 +3195,16 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(Function &func) { DWARFDIE function_die = dwarf_cu->GetNonSkeletonUnit().GetDIE(function_die_offset); if (function_die) { - ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), function_die, - LLDB_INVALID_ADDRESS, 0); + // We can't use the file address from the Function object as (in the OSO + // case) it will already be remapped to the main module. + DWARFRangeList ranges = function_die.GetDIE()->GetAttributeAddressRanges( + function_die.GetCU(), + /*check_hi_lo_pc=*/true); + lldb::addr_t function_file_addr = + ranges.GetMinRangeBase(LLDB_INVALID_ADDRESS); + if (function_file_addr != LLDB_INVALID_ADDRESS) + ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), + function_die.GetFirstChild(), function_file_addr); } return functions_added; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index ac25a0c48ee7d..76f4188fdf4af 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -395,8 +395,7 @@ class SymbolFileDWARF : public SymbolFileCommon { Function *ParseFunction(CompileUnit &comp_unit, const DWARFDIE &die); size_t ParseBlocksRecursive(CompileUnit &comp_unit, Block *parent_block, - const DWARFDIE &die, - lldb::addr_t subprogram_low_pc, uint32_t depth); + DWARFDIE die, lldb::addr_t subprogram_low_pc); size_t ParseTypes(const SymbolContext &sc, const DWARFDIE &die, bool parse_siblings, bool parse_children); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index d17fedf26b4c4..27d51bbd1cb56 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -394,18 +394,12 @@ Block *SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { switch (sym.kind()) { case S_GPROC32: - case S_LPROC32: { + case S_LPROC32: // This is a function. It must be global. Creating the Function entry // for it automatically creates a block for it. - FunctionSP func = GetOrCreateFunction(block_id, *comp_unit); - if (func) { - Block &block = func->GetBlock(false); - if (block.GetNumRanges() == 0) - block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); - return █ - } + if (FunctionSP func = GetOrCreateFunction(block_id, *comp_unit)) + return &func->GetBlock(false); break; - } case S_BLOCK32: { // This is a block. Its parent is either a function or another block. In // either case, its parent can be viewed as a block (e.g. a function diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 4935b0fbdfd87..b7854c05d345a 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -402,44 +402,32 @@ static size_t ParseFunctionBlocksForPDBSymbol( assert(pdb_symbol && parent_block); size_t num_added = 0; - switch (pdb_symbol->getSymTag()) { - case PDB_SymType::Block: - case PDB_SymType::Function: { - Block *block = nullptr; - auto &raw_sym = pdb_symbol->getRawSymbol(); - if (auto *pdb_func = llvm::dyn_cast(pdb_symbol)) { - if (pdb_func->hasNoInlineAttribute()) - break; - if (is_top_parent) - block = parent_block; - else - break; - } else if (llvm::isa(pdb_symbol)) { - auto uid = pdb_symbol->getSymIndexId(); - if (parent_block->FindBlockByID(uid)) - break; - if (raw_sym.getVirtualAddress() < func_file_vm_addr) - break; - block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); - } else - llvm_unreachable("Unexpected PDB symbol!"); + if (!is_top_parent) { + // Ranges for the top block were parsed together with the function. + if (pdb_symbol->getSymTag() != PDB_SymType::Block) + return num_added; + auto &raw_sym = pdb_symbol->getRawSymbol(); + assert(llvm::isa(pdb_symbol)); + auto uid = pdb_symbol->getSymIndexId(); + if (parent_block->FindBlockByID(uid)) + return num_added; + if (raw_sym.getVirtualAddress() < func_file_vm_addr) + return num_added; + + Block *block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); block->AddRange(Block::Range( raw_sym.getVirtualAddress() - func_file_vm_addr, raw_sym.getLength())); block->FinalizeRanges(); - ++num_added; + } + auto results_up = pdb_symbol->findAllChildren(); + if (!results_up) + return num_added; - auto results_up = pdb_symbol->findAllChildren(); - if (!results_up) - break; - while (auto symbol_up = results_up->getNext()) { - num_added += ParseFunctionBlocksForPDBSymbol( - func_file_vm_addr, symbol_up.get(), block, false); - } - } break; - default: - break; + while (auto symbol_up = results_up->getNext()) { + num_added += ParseFunctionBlocksForPDBSymbol( + func_file_vm_addr, symbol_up.get(), parent_block, false); } return num_added; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index b346749ca06ec..4f07b946353a4 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -279,9 +279,14 @@ Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, AddressRanges ranges) : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid), m_type(type), m_mangled(mangled), m_block(*this, func_uid), - m_ranges(std::move(ranges)), m_range(CollapseRanges(m_ranges)), - m_frame_base(), m_flags(), m_prologue_byte_size(0) { + m_range(CollapseRanges(ranges)), m_prologue_byte_size(0) { assert(comp_unit != nullptr); + lldb::addr_t base_file_addr = m_range.GetBaseAddress().GetFileAddress(); + for (const AddressRange &range : ranges) + m_block.AddRange( + Block::Range(range.GetBaseAddress().GetFileAddress() - base_file_addr, + range.GetByteSize())); + m_block.FinalizeRanges(); } Function::~Function() = default; @@ -426,13 +431,16 @@ void Function::GetDescription(Stream *s, lldb::DescriptionLevel level, llvm::interleaveComma(decl_context, *s, [&](auto &ctx) { ctx.Dump(*s); }); *s << "}"; } - *s << ", range" << (m_ranges.size() > 1 ? "s" : "") << " = "; + *s << ", range" << (m_block.GetNumRanges() > 1 ? "s" : "") << " = "; Address::DumpStyle fallback_style = level == eDescriptionLevelVerbose ? Address::DumpStyleModuleWithFileAddress : Address::DumpStyleFileAddress; - for (const AddressRange &range : m_ranges) + for (unsigned idx = 0; idx < m_block.GetNumRanges(); ++idx) { + AddressRange range; + m_block.GetRangeAtIndex(idx, range); range.Dump(s, target, Address::DumpStyleLoadAddress, fallback_style); + } } void Function::Dump(Stream *s, bool show_context) const { diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s index 2584158207cc8..b03d5d12ad2a1 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s @@ -10,7 +10,7 @@ # CHECK: 1 match found in {{.*}} # CHECK: Summary: {{.*}}`foo -# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x0000000000000007)[0x0000000000000007-0x000000000000000e)[0x0000000000000014-0x000000000000001b)[0x000000000000001b-0x000000000000001c) +# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x000000000000000e)[0x0000000000000014-0x000000000000001c) .text diff --git a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test index 1cb20a4036382..9057d01c25840 100644 --- a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test +++ b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test @@ -2,7 +2,6 @@ REQUIRES: system-windows, lld RUN: %build --compiler=clang-cl --nodefaultlib --output=%t.exe %S/Inputs/FunctionNestedBlockTest.cpp RUN: lldb-test symbols -find=function -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-FUNCTION %s RUN: lldb-test symbols -find=block -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-BLOCK %s -XFAIL: * CHECK-FUNCTION: Found 1 functions: CHECK-FUNCTION: name = "{{.*}}", mangled = "{{_?}}main" From 2526d5b1689389da9b194b5ec2878cfb2f4aca93 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 3 Dec 2024 10:27:31 +0100 Subject: [PATCH 045/191] Revert "[lldb] Use the function block as a source for function ranges (#117996)" This reverts commit ba14dac481564000339ba22ab867617590184f4c. I guess "has no conflicts" doesn't mean "it will build". --- lldb/include/lldb/Symbol/Function.h | 3 + .../Breakpad/SymbolFileBreakpad.cpp | 4 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 179 +++++++++++------- .../SymbolFile/DWARF/SymbolFileDWARF.h | 3 +- .../NativePDB/SymbolFileNativePDB.cpp | 12 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 52 +++-- lldb/source/Symbol/Function.cpp | 16 +- .../DWARF/x86/discontinuous-function.s | 2 +- .../SymbolFile/PDB/function-nested-block.test | 1 + 9 files changed, 163 insertions(+), 109 deletions(-) diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 51289f0f74ddf..855940a6415d7 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -653,6 +653,9 @@ class Function : public UserID, public SymbolContextScope { /// All lexical blocks contained in this function. Block m_block; + /// List of address ranges belonging to the function. + AddressRanges m_ranges; + /// The function address range that covers the widest range needed to contain /// all blocks. DEPRECATED: do not use this field in new code as the range may /// include addresses belonging to other functions. diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index bc886259d6fa5..df3bf157278da 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -299,7 +299,9 @@ size_t SymbolFileBreakpad::ParseBlocksRecursive(Function &func) { // "INLINE 0 ...", the current level is 0 and its parent block is the // function block at index 0. std::vector blocks; - blocks.push_back(&func.GetBlock(false)); + Block &block = func.GetBlock(false); + block.AddRange(Block::Range(0, func.GetAddressRange().GetByteSize())); + blocks.push_back(&block); size_t blocks_added = 0; addr_t func_base = func.GetAddressRange().GetBaseAddress().GetOffset(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 6f19b264eb3dd..fe711c56958c4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1305,76 +1305,121 @@ bool SymbolFileDWARF::ParseDebugMacros(CompileUnit &comp_unit) { return true; } -size_t SymbolFileDWARF::ParseBlocksRecursive(CompileUnit &comp_unit, - Block *parent_block, DWARFDIE die, - addr_t subprogram_low_pc) { +size_t SymbolFileDWARF::ParseBlocksRecursive( + lldb_private::CompileUnit &comp_unit, Block *parent_block, + const DWARFDIE &orig_die, addr_t subprogram_low_pc, uint32_t depth) { size_t blocks_added = 0; - for (; die; die = die.GetSibling()) { + DWARFDIE die = orig_die; + while (die) { dw_tag_t tag = die.Tag(); - if (tag != DW_TAG_inlined_subroutine && tag != DW_TAG_lexical_block) - continue; + switch (tag) { + case DW_TAG_inlined_subroutine: + case DW_TAG_subprogram: + case DW_TAG_lexical_block: { + Block *block = nullptr; + if (tag == DW_TAG_subprogram) { + // Skip any DW_TAG_subprogram DIEs that are inside of a normal or + // inlined functions. These will be parsed on their own as separate + // entities. - Block *block = parent_block->CreateChild(die.GetID()).get(); - DWARFRangeList ranges; - const char *name = nullptr; - const char *mangled_name = nullptr; - - std::optional decl_file; - std::optional decl_line; - std::optional decl_column; - std::optional call_file; - std::optional call_line; - std::optional call_column; - if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, - decl_line, decl_column, call_file, call_line, - call_column, nullptr)) { - const size_t num_ranges = ranges.GetSize(); - for (size_t i = 0; i < num_ranges; ++i) { - const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); - const addr_t range_base = range.GetRangeBase(); - if (range_base >= subprogram_low_pc) - block->AddRange(Block::Range(range_base - subprogram_low_pc, - range.GetByteSize())); - else { - GetObjectFile()->GetModule()->ReportError( - "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " - "that is less than the function's low PC {3:x16}. Please file " - "a bug and attach the file at the " - "start of this error message", - block->GetID(), range_base, range.GetRangeEnd(), - subprogram_low_pc); - } - } - block->FinalizeRanges(); - - if (tag != DW_TAG_subprogram && - (name != nullptr || mangled_name != nullptr)) { - std::unique_ptr decl_up; - if (decl_file || decl_line || decl_column) - decl_up = std::make_unique( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - decl_file ? *decl_file : 0), - decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); - - std::unique_ptr call_up; - if (call_file || call_line || call_column) - call_up = std::make_unique( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - call_file ? *call_file : 0), - call_line ? *call_line : 0, call_column ? *call_column : 0); - - block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), - call_up.get()); + if (depth > 0) + break; + + block = parent_block; + } else { + block = parent_block->CreateChild(die.GetID()).get(); } + DWARFRangeList ranges; + const char *name = nullptr; + const char *mangled_name = nullptr; + + std::optional decl_file; + std::optional decl_line; + std::optional decl_column; + std::optional call_file; + std::optional call_line; + std::optional call_column; + if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, + decl_line, decl_column, call_file, call_line, + call_column, nullptr)) { + if (tag == DW_TAG_subprogram) { + assert(subprogram_low_pc == LLDB_INVALID_ADDRESS); + subprogram_low_pc = ranges.GetMinRangeBase(0); + } else if (tag == DW_TAG_inlined_subroutine) { + // We get called here for inlined subroutines in two ways. The first + // time is when we are making the Function object for this inlined + // concrete instance. Since we're creating a top level block at + // here, the subprogram_low_pc will be LLDB_INVALID_ADDRESS. So we + // need to adjust the containing address. The second time is when we + // are parsing the blocks inside the function that contains the + // inlined concrete instance. Since these will be blocks inside the + // containing "real" function the offset will be for that function. + if (subprogram_low_pc == LLDB_INVALID_ADDRESS) { + subprogram_low_pc = ranges.GetMinRangeBase(0); + } + } - ++blocks_added; + const size_t num_ranges = ranges.GetSize(); + for (size_t i = 0; i < num_ranges; ++i) { + const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); + const addr_t range_base = range.GetRangeBase(); + if (range_base >= subprogram_low_pc) + block->AddRange(Block::Range(range_base - subprogram_low_pc, + range.GetByteSize())); + else { + GetObjectFile()->GetModule()->ReportError( + "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " + "that is less than the function's low PC {3:x16}. Please file " + "a bug and attach the file at the " + "start of this error message", + block->GetID(), range_base, range.GetRangeEnd(), + subprogram_low_pc); + } + } + block->FinalizeRanges(); + + if (tag != DW_TAG_subprogram && + (name != nullptr || mangled_name != nullptr)) { + std::unique_ptr decl_up; + if (decl_file || decl_line || decl_column) + decl_up = std::make_unique( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + decl_file ? *decl_file : 0), + decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); + + std::unique_ptr call_up; + if (call_file || call_line || call_column) + call_up = std::make_unique( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + call_file ? *call_file : 0), + call_line ? *call_line : 0, call_column ? *call_column : 0); + + block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), + call_up.get()); + } + + ++blocks_added; - if (die.HasChildren()) { - blocks_added += ParseBlocksRecursive( - comp_unit, block, die.GetFirstChild(), subprogram_low_pc); + if (die.HasChildren()) { + blocks_added += + ParseBlocksRecursive(comp_unit, block, die.GetFirstChild(), + subprogram_low_pc, depth + 1); + } } + } break; + default: + break; } + + // Only parse siblings of the block if we are not at depth zero. A depth of + // zero indicates we are currently parsing the top level DW_TAG_subprogram + // DIE + + if (depth == 0) + die.Clear(); + else + die = die.GetSibling(); } return blocks_added; } @@ -3195,16 +3240,8 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(Function &func) { DWARFDIE function_die = dwarf_cu->GetNonSkeletonUnit().GetDIE(function_die_offset); if (function_die) { - // We can't use the file address from the Function object as (in the OSO - // case) it will already be remapped to the main module. - DWARFRangeList ranges = function_die.GetDIE()->GetAttributeAddressRanges( - function_die.GetCU(), - /*check_hi_lo_pc=*/true); - lldb::addr_t function_file_addr = - ranges.GetMinRangeBase(LLDB_INVALID_ADDRESS); - if (function_file_addr != LLDB_INVALID_ADDRESS) - ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), - function_die.GetFirstChild(), function_file_addr); + ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), function_die, + LLDB_INVALID_ADDRESS, 0); } return functions_added; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 76f4188fdf4af..ac25a0c48ee7d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -395,7 +395,8 @@ class SymbolFileDWARF : public SymbolFileCommon { Function *ParseFunction(CompileUnit &comp_unit, const DWARFDIE &die); size_t ParseBlocksRecursive(CompileUnit &comp_unit, Block *parent_block, - DWARFDIE die, lldb::addr_t subprogram_low_pc); + const DWARFDIE &die, + lldb::addr_t subprogram_low_pc, uint32_t depth); size_t ParseTypes(const SymbolContext &sc, const DWARFDIE &die, bool parse_siblings, bool parse_children); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 27d51bbd1cb56..d17fedf26b4c4 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -394,12 +394,18 @@ Block *SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { switch (sym.kind()) { case S_GPROC32: - case S_LPROC32: + case S_LPROC32: { // This is a function. It must be global. Creating the Function entry // for it automatically creates a block for it. - if (FunctionSP func = GetOrCreateFunction(block_id, *comp_unit)) - return &func->GetBlock(false); + FunctionSP func = GetOrCreateFunction(block_id, *comp_unit); + if (func) { + Block &block = func->GetBlock(false); + if (block.GetNumRanges() == 0) + block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); + return █ + } break; + } case S_BLOCK32: { // This is a block. Its parent is either a function or another block. In // either case, its parent can be viewed as a block (e.g. a function diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index b7854c05d345a..4935b0fbdfd87 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -402,32 +402,44 @@ static size_t ParseFunctionBlocksForPDBSymbol( assert(pdb_symbol && parent_block); size_t num_added = 0; + switch (pdb_symbol->getSymTag()) { + case PDB_SymType::Block: + case PDB_SymType::Function: { + Block *block = nullptr; + auto &raw_sym = pdb_symbol->getRawSymbol(); + if (auto *pdb_func = llvm::dyn_cast(pdb_symbol)) { + if (pdb_func->hasNoInlineAttribute()) + break; + if (is_top_parent) + block = parent_block; + else + break; + } else if (llvm::isa(pdb_symbol)) { + auto uid = pdb_symbol->getSymIndexId(); + if (parent_block->FindBlockByID(uid)) + break; + if (raw_sym.getVirtualAddress() < func_file_vm_addr) + break; - if (!is_top_parent) { - // Ranges for the top block were parsed together with the function. - if (pdb_symbol->getSymTag() != PDB_SymType::Block) - return num_added; + block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); + } else + llvm_unreachable("Unexpected PDB symbol!"); - auto &raw_sym = pdb_symbol->getRawSymbol(); - assert(llvm::isa(pdb_symbol)); - auto uid = pdb_symbol->getSymIndexId(); - if (parent_block->FindBlockByID(uid)) - return num_added; - if (raw_sym.getVirtualAddress() < func_file_vm_addr) - return num_added; - - Block *block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); block->AddRange(Block::Range( raw_sym.getVirtualAddress() - func_file_vm_addr, raw_sym.getLength())); block->FinalizeRanges(); - } - auto results_up = pdb_symbol->findAllChildren(); - if (!results_up) - return num_added; + ++num_added; - while (auto symbol_up = results_up->getNext()) { - num_added += ParseFunctionBlocksForPDBSymbol( - func_file_vm_addr, symbol_up.get(), parent_block, false); + auto results_up = pdb_symbol->findAllChildren(); + if (!results_up) + break; + while (auto symbol_up = results_up->getNext()) { + num_added += ParseFunctionBlocksForPDBSymbol( + func_file_vm_addr, symbol_up.get(), block, false); + } + } break; + default: + break; } return num_added; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 4f07b946353a4..b346749ca06ec 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -279,14 +279,9 @@ Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, AddressRanges ranges) : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid), m_type(type), m_mangled(mangled), m_block(*this, func_uid), - m_range(CollapseRanges(ranges)), m_prologue_byte_size(0) { + m_ranges(std::move(ranges)), m_range(CollapseRanges(m_ranges)), + m_frame_base(), m_flags(), m_prologue_byte_size(0) { assert(comp_unit != nullptr); - lldb::addr_t base_file_addr = m_range.GetBaseAddress().GetFileAddress(); - for (const AddressRange &range : ranges) - m_block.AddRange( - Block::Range(range.GetBaseAddress().GetFileAddress() - base_file_addr, - range.GetByteSize())); - m_block.FinalizeRanges(); } Function::~Function() = default; @@ -431,16 +426,13 @@ void Function::GetDescription(Stream *s, lldb::DescriptionLevel level, llvm::interleaveComma(decl_context, *s, [&](auto &ctx) { ctx.Dump(*s); }); *s << "}"; } - *s << ", range" << (m_block.GetNumRanges() > 1 ? "s" : "") << " = "; + *s << ", range" << (m_ranges.size() > 1 ? "s" : "") << " = "; Address::DumpStyle fallback_style = level == eDescriptionLevelVerbose ? Address::DumpStyleModuleWithFileAddress : Address::DumpStyleFileAddress; - for (unsigned idx = 0; idx < m_block.GetNumRanges(); ++idx) { - AddressRange range; - m_block.GetRangeAtIndex(idx, range); + for (const AddressRange &range : m_ranges) range.Dump(s, target, Address::DumpStyleLoadAddress, fallback_style); - } } void Function::Dump(Stream *s, bool show_context) const { diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s index b03d5d12ad2a1..2584158207cc8 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s @@ -10,7 +10,7 @@ # CHECK: 1 match found in {{.*}} # CHECK: Summary: {{.*}}`foo -# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x000000000000000e)[0x0000000000000014-0x000000000000001c) +# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x0000000000000007)[0x0000000000000007-0x000000000000000e)[0x0000000000000014-0x000000000000001b)[0x000000000000001b-0x000000000000001c) .text diff --git a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test index 9057d01c25840..1cb20a4036382 100644 --- a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test +++ b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test @@ -2,6 +2,7 @@ REQUIRES: system-windows, lld RUN: %build --compiler=clang-cl --nodefaultlib --output=%t.exe %S/Inputs/FunctionNestedBlockTest.cpp RUN: lldb-test symbols -find=function -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-FUNCTION %s RUN: lldb-test symbols -find=block -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-BLOCK %s +XFAIL: * CHECK-FUNCTION: Found 1 functions: CHECK-FUNCTION: name = "{{.*}}", mangled = "{{_?}}main" From bfb26202e05ee2932b4368b5fca607df01e8247f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 3 Dec 2024 09:46:19 +0000 Subject: [PATCH 046/191] LV/test: clean up a test and regen with UTC (#118394) --- .../Transforms/LoopVectorize/select-cmp.ll | 1537 ++++++++++++++--- 1 file changed, 1270 insertions(+), 267 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index 7b66440a7fdcc..301526cf3070c 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -1,320 +1,1323 @@ -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 --check-prefix=CHECK -; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK -; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK - -define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { -; CHECK-LABEL: @select_const_i32_from_icmp -; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] -; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 - -; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) -; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) -; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) -; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <4 x i1> [[VEC_PHI1]], [[NOT1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <4 x i1> [[VEC_PHI2]], [[NOT2]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <4 x i1> [[VEC_PHI4]], [[NOT4]] -; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = or <4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = or <4 x i1> [[VEC_SEL3]], [[VEC_SEL5]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = or <4 x i1> [[VEC_SEL4]], [[VEC_SEL6]] -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL7]]) -; CHECK-VF4IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 - - -; CHECK-VF1IC4: vector.body: -; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] -; CHECK-VF1IC4: [[VEC_LOAD1:%.*]] = load i32 -; CHECK-VF1IC4-NEXT: [[VEC_LOAD2:%.*]] = load i32 -; CHECK-VF1IC4-NEXT: [[VEC_LOAD3:%.*]] = load i32 -; CHECK-VF1IC4-NEXT: [[VEC_LOAD4:%.*]] = load i32 -; CHECK-VF1IC4-NEXT: [[VEC_ICMP1:%.*]] = icmp eq i32 [[VEC_LOAD1]], 3 -; CHECK-VF1IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq i32 [[VEC_LOAD2]], 3 -; CHECK-VF1IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq i32 [[VEC_LOAD3]], 3 -; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq i32 [[VEC_LOAD4]], 3 -; CHECK-VF1IC4-NEXT: [[NOT1:%.*]] = xor i1 [[VEC_ICMP1]], true -; CHECK-VF1IC4-NEXT: [[NOT2:%.*]] = xor i1 [[VEC_ICMP2]], true -; CHECK-VF1IC4-NEXT: [[NOT3:%.*]] = xor i1 [[VEC_ICMP3]], true -; CHECK-VF1IC4-NEXT: [[NOT4:%.*]] = xor i1 [[VEC_ICMP4]], true -; CHECK-VF1IC4-NEXT: [[VEC_SEL1:%.*]] = or i1 [[VEC_PHI1]], [[NOT1]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL2:%.*]] = or i1 [[VEC_PHI2]], [[NOT2]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL3:%.*]] = or i1 [[VEC_PHI3]], [[NOT3]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL4:%.*]] = or i1 [[VEC_PHI4]], [[NOT4]] -; CHECK-VF1IC4: middle.block: -; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = or i1 [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = or i1 [[VEC_SEL3]], [[VEC_SEL5]] -; CHECK-VF1IC4-NEXT: [[OR_RDX:%.*]] = or i1 [[VEC_SEL4]], [[VEC_SEL6]] -; CHECK-VF1IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC1 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC4 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF1IC4 +define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) { +; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC1: [[LOOP]]: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7 +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]] +; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]] +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]]) +; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC4: [[LOOP]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7 +; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3 +; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 7, i32 3 +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF1IC4: [[LOOP]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7 +; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] - %1 = phi i32 [ 3, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, ptr %v, i64 %0 - %3 = load i32, ptr %2, align 4 - %4 = icmp eq i32 %3, 3 - %5 = select i1 %4, i32 %1, i32 7 - %6 = add nuw nsw i64 %0, 1 - %7 = icmp eq i64 %6, %n - br i1 %7, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 3, %entry ], [ %sel, %loop ] + %gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv + %load.v.iv = load i32, ptr %gep.v.iv, align 4 + %cmp.v.iv.3 = icmp eq i32 %load.v.iv, 3 + %sel = select i1 %cmp.v.iv.3, i32 %rdx, i32 7 + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %5 +exit: ; preds = %loop + ret i32 %sel } - -define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { -; CHECK-LABEL: @select_const_i32_from_icmp2 -; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] -; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 - +define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) { +; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp2( +; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI]], [[TMP3]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP6]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC1: [[LOOP]]: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]] +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp2( +; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI]], [[TMP6]] +; CHECK-VF4IC4-NEXT: [[TMP11]] = or <4 x i1> [[VEC_PHI1]], [[TMP7]] +; CHECK-VF4IC4-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI2]], [[TMP8]] +; CHECK-VF4IC4-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI3]], [[TMP9]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP11]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP12]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP13]], [[BIN_RDX7]] +; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]]) +; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = freeze i1 [[TMP15]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP16]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC4: [[LOOP]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]] +; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp2( +; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3 +; CHECK-VF1IC4-NEXT: [[TMP16]] = or i1 [[VEC_PHI]], [[TMP12]] +; CHECK-VF1IC4-NEXT: [[TMP17]] = or i1 [[VEC_PHI1]], [[TMP13]] +; CHECK-VF1IC4-NEXT: [[TMP18]] = or i1 [[VEC_PHI2]], [[TMP14]] +; CHECK-VF1IC4-NEXT: [[TMP19]] = or i1 [[VEC_PHI3]], [[TMP15]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP17]], [[TMP16]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP19]], [[BIN_RDX4]] +; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP21]], i32 7, i32 3 +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF1IC4: [[LOOP]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]] +; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] - %1 = phi i32 [ 3, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, ptr %v, i64 %0 - %3 = load i32, ptr %2, align 4 - %4 = icmp eq i32 %3, 3 - %5 = select i1 %4, i32 7, i32 %1 - %6 = add nuw nsw i64 %0, 1 - %7 = icmp eq i64 %6, %n - br i1 %7, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 3, %entry ], [ %sel, %loop ] + %gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv + %load.v.iv = load i32, ptr %gep.v.iv, align 4 + %cmp.v.iv.3 = icmp eq i32 %load.v.iv, 3 + %sel = select i1 %cmp.v.iv.3, i32 7, i32 %rdx + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %5 +exit: ; preds = %loop + ret i32 %sel } - -define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) { -; CHECK-LABEL: @select_i32_from_icmp -; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1-NOT: shufflevector <4 x i32> -; CHECK-VF4IC1-NOT: shufflevector <4 x i32> -; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] -; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a +define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) { +; CHECK-VF4IC1-LABEL: define i32 @select_i32_from_icmp( +; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 [[B]], i32 [[A]] +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC1: [[LOOP]]: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]] +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_i32_from_icmp( +; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]] +; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]] +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]]) +; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[B]], i32 [[A]] +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC4: [[LOOP]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]] +; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_i32_from_icmp( +; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3 +; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 [[B]], i32 [[A]] +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF1IC4: [[LOOP]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]] +; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] - %1 = phi i32 [ %a, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, ptr %v, i64 %0 - %3 = load i32, ptr %2, align 4 - %4 = icmp eq i32 %3, 3 - %5 = select i1 %4, i32 %1, i32 %b - %6 = add nuw nsw i64 %0, 1 - %7 = icmp eq i64 %6, %n - br i1 %7, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %a, %entry ], [ %sel, %loop ] + %gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv + %load.v.iv = load i32, ptr %gep.v.iv, align 4 + %cmp.load.iv.3 = icmp eq i32 %load.v.iv, 3 + %sel = select i1 %cmp.load.iv.3, i32 %rdx, i32 %b + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %5 +exit: ; preds = %loop + ret i32 %sel } - -define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { -; CHECK-LABEL: @select_const_i32_from_fcmp_fast -; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], splat (i1 true) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] -; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 +define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) { +; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp_fast( +; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC1: [[LOOP]]: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1 +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast( +; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]] +; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]] +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]]) +; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 1, i32 2 +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC4: [[LOOP]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1 +; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast( +; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast ueq float [[TMP8]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast ueq float [[TMP9]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast ueq float [[TMP11]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2 +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF1IC4: [[LOOP]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1 +; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] - %1 = phi i32 [ 2, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds float, ptr %v, i64 %0 - %3 = load float, ptr %2, align 4 - %4 = fcmp fast ueq float %3, 3.0 - %5 = select i1 %4, i32 %1, i32 1 - %6 = add nuw nsw i64 %0, 1 - %7 = icmp eq i64 %6, %n - br i1 %7, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 2, %entry ], [ %sel, %loop ] + %gep.v.iv = getelementptr inbounds float, ptr %v, i64 %iv + %load.v.iv = load float, ptr %gep.v.iv, align 4 + %cmp.load.iv.3 = fcmp fast ueq float %load.v.iv, 3.0 + %sel = select i1 %cmp.load.iv.3, i32 %rdx, i32 1 + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %5 +exit: ; preds = %loop + ret i32 %sel } - -define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { -; CHECK-LABEL: @select_const_i32_from_fcmp -; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], splat (i1 true) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] -; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 +define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) { +; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC1: [[LOOP]]: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1 +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00) +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]] +; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]] +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]]) +; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 1, i32 2 +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC4: [[LOOP]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1 +; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp ueq float [[TMP8]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp ueq float [[TMP9]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp ueq float [[TMP10]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP11]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2 +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF1IC4: [[LOOP]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1 +; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] - %1 = phi i32 [ 2, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds float, ptr %v, i64 %0 - %3 = load float, ptr %2, align 4 - %4 = fcmp ueq float %3, 3.0 - %5 = select i1 %4, i32 %1, i32 1 - %6 = add nuw nsw i64 %0, 1 - %7 = icmp eq i64 %6, %n - br i1 %7, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 2, %entry ], [ %sel, %loop ] + %gep.v.iv = getelementptr inbounds float, ptr %v, i64 %iv + %load.v.iv = load float, ptr %gep.v.iv, align 4 + %cmp.v.iv.3 = fcmp ueq float %load.v.iv, 3.0 + %sel = select i1 %cmp.v.iv.3, i32 %rdx, i32 1 + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %5 +exit: ; preds = %loop + ret i32 %sel } - define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { -; CHECK-LABEL: @select_i32_from_icmp_same_inputs -; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 -; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], splat (i32 3) -; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] -; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-LABEL: define i32 @select_i32_from_icmp_same_inputs( +; CHECK-VF4IC1-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3) +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i32 [[B]], i32 [[A]] +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC1: [[LOOP]]: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]] +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs( +; CHECK-VF4IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3) +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP1]] +; CHECK-VF4IC4-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI1]], [[TMP2]] +; CHECK-VF4IC4-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]] +; CHECK-VF4IC4-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX4:%.*]] = or <4 x i1> [[TMP7]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP8]], [[BIN_RDX4]] +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]]) +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[B]], i32 [[A]] +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF4IC4: [[LOOP]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF4IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]] +; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs( +; CHECK-VF1IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3 +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = xor i1 [[TMP0]], true +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = xor i1 [[TMP0]], true +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true +; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP4]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP6]], [[TMP5]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP7]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP8]], [[BIN_RDX4]] +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP10]], i32 [[B]], i32 [[A]] +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-VF1IC4: [[LOOP]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-VF1IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]] +; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %4, %for.body ] - %1 = phi i32 [ %a, %entry ], [ %3, %for.body ] - %2 = icmp eq i32 %1, 3 - %3 = select i1 %2, i32 %1, i32 %b - %4 = add nuw nsw i64 %0, 1 - %5 = icmp eq i64 %4, %n - br i1 %5, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %a, %entry ], [ %sel, %loop ] + %cmp.rdx.3 = icmp eq i32 %rdx, 3 + %sel = select i1 %cmp.rdx.3, i32 %rdx, i32 %b + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %3 +exit: ; preds = %loop + ret i32 %sel } - -; Negative tests +;; Negative tests ; We don't support FP reduction variables at the moment. -define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) { -; CHECK: @select_const_f32_from_icmp -; CHECK-NOT: vector.body +define float @select_const_f32_from_icmp(ptr %v, i64 %n) { +; CHECK-LABEL: define float @select_const_f32_from_icmp( +; CHECK-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi fast float [ 3.000000e+00, %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-NEXT: [[SEL]] = select fast i1 [[CMP_V_IV_3]], float [[RDX]], float 7.000000e+00 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi float [ [[SEL]], %[[LOOP]] ] +; CHECK-NEXT: ret float [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] - %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, ptr %v, i64 %0 - %3 = load i32, ptr %2, align 4 - %4 = icmp eq i32 %3, 3 - %5 = select fast i1 %4, float %1, float 7.0 - %6 = add nuw nsw i64 %0, 1 - %7 = icmp eq i64 %6, %n - br i1 %7, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi fast float [ 3.0, %entry ], [ %sel, %loop ] + %gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv + %load.v.iv = load i32, ptr %gep.v.iv, align 4 + %cmp.v.iv.3 = icmp eq i32 %load.v.iv, 3 + %sel = select fast i1 %cmp.v.iv.3, float %rdx, float 7.0 + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret float %5 +exit: ; preds = %loop + ret float %sel } - ; We don't support selecting loop-variant values. -define i32 @select_variant_i32_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) { -; CHECK-LABEL: @select_variant_i32_from_icmp -; CHECK-NOT: vector.body +define i32 @select_variant_i32_from_icmp(ptr %v1, ptr %v2, i64 %n) { +; CHECK-LABEL: define i32 @select_variant_i32_from_icmp( +; CHECK-SAME: ptr [[V1:%.*]], ptr [[V2:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 3, %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_V1_IV:%.*]] = getelementptr inbounds i32, ptr [[V1]], i64 [[IV]] +; CHECK-NEXT: [[LOAD_V1_IV:%.*]] = load i32, ptr [[GEP_V1_IV]], align 4 +; CHECK-NEXT: [[GEP_V2_IV:%.*]] = getelementptr inbounds i32, ptr [[V2]], i64 [[IV]] +; CHECK-NEXT: [[LOAD_V2_IV:%.*]] = load i32, ptr [[GEP_V2_IV]], align 4 +; CHECK-NEXT: [[CMP_V1_IV_3:%.*]] = icmp eq i32 [[LOAD_V1_IV]], 3 +; CHECK-NEXT: [[SEL]] = select i1 [[CMP_V1_IV_3]], i32 [[RDX]], i32 [[LOAD_V2_IV]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i64 [ 0, %entry ], [ %8, %for.body ] - %1 = phi i32 [ 3, %entry ], [ %7, %for.body ] - %2 = getelementptr inbounds i32, ptr %v1, i64 %0 - %3 = load i32, ptr %2, align 4 - %4 = getelementptr inbounds i32, ptr %v2, i64 %0 - %5 = load i32, ptr %4, align 4 - %6 = icmp eq i32 %3, 3 - %7 = select i1 %6, i32 %1, i32 %5 - %8 = add nuw nsw i64 %0, 1 - %9 = icmp eq i64 %8, %n - br i1 %9, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 3, %entry ], [ %sel, %loop ] + %gep.v1.iv = getelementptr inbounds i32, ptr %v1, i64 %iv + %load.v1.iv = load i32, ptr %gep.v1.iv, align 4 + %gep.v2.iv = getelementptr inbounds i32, ptr %v2, i64 %iv + %load.v2.iv = load i32, ptr %gep.v2.iv, align 4 + %cmp.v1.iv.3 = icmp eq i32 %load.v1.iv, 3 + %sel = select i1 %cmp.v1.iv.3, i32 %rdx, i32 %load.v2.iv + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %7 +exit: ; preds = %loop + ret i32 %sel } - ; We only support selects where the input comes from the same PHI as the ; reduction PHI. In the example below, the select uses the induction ; variable input and the icmp uses the reduction PHI. define i32 @select_i32_from_icmp_non_redux_phi(i32 %a, i32 %b, i32 %n) { -; CHECK-LABEL: @select_i32_from_icmp_non_redux_phi -; CHECK-NOT: vector.body +; CHECK-LABEL: define i32 @select_i32_from_icmp_non_redux_phi( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[A]], %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[IV]], i32 [[B]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[SEL_LCSSA]] +; entry: - br label %for.body + br label %loop -for.body: ; preds = %entry, %for.body - %0 = phi i32 [ 0, %entry ], [ %4, %for.body ] - %1 = phi i32 [ %a, %entry ], [ %3, %for.body ] - %2 = icmp eq i32 %1, 3 - %3 = select i1 %2, i32 %0, i32 %b - %4 = add nuw nsw i32 %0, 1 - %5 = icmp eq i32 %4, %n - br i1 %5, label %exit, label %for.body +loop: ; preds = %entry, %loop + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %a, %entry ], [ %sel, %loop ] + %cmp.rdx.3 = icmp eq i32 %rdx, 3 + %sel = select i1 %cmp.rdx.3, i32 %iv, i32 %b + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp eq i32 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop -exit: ; preds = %for.body - ret i32 %3 +exit: ; preds = %loop + ret i32 %sel } +;. +; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK-VF4IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-VF4IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK-VF4IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-VF4IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK-VF4IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-VF4IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. +; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK-VF4IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-VF4IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK-VF4IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-VF4IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK-VF4IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-VF4IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. +; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK-VF1IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-VF1IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} +; CHECK-VF1IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-VF1IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; CHECK-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +;. From 4849d593ab07c47f9f520bea636f62d159d57006 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Tue, 3 Dec 2024 10:10:11 +0000 Subject: [PATCH 047/191] [clang] Warn [[clang::lifetimebound]] misusages on types (#118281) Emit the "cannot be applied to types" warning instead of silently ignoring the attribute when it's attempted to be used on a type (instead of a function argument or the function definition). Before this commit, the warning has been printed when the attribute was (mis)used on a decl-specifier, but not in other places in a declarator. Examples where the warning starts being emitted with this commit: ``` int * [[clang::lifetimebound]] x; void f(int * [[clang::lifetimebound]] x); void g(int * [[clang::lifetimebound]]); ``` Note that the last example is the case of an unnamed function parameter. While in theory Clang could've supported the `[[clang::lifetimebound]]` analysis for unnamed parameters, it doesn't currently, so the commit at least makes the situation better by highlighting this as a warning instead of a silent ignore - which was reported at #96034. --- clang/docs/ReleaseNotes.rst | 17 ++++++++++++++++- clang/lib/Sema/SemaType.cpp | 4 ++++ clang/test/SemaCXX/attr-lifetimebound.cpp | 18 ++++++++++++++++-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 01c7899e36c93..4e4dcd83cc28e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -139,7 +139,7 @@ C++ Specific Potentially Breaking Changes // Fixed version: unsigned operator""_udl_name(unsigned long long); -- Clang will now produce an error diagnostic when [[clang::lifetimebound]] is +- Clang will now produce an error diagnostic when ``[[clang::lifetimebound]]`` is applied on a parameter or an implicit object parameter of a function that returns void. This was previously ignored and had no effect. (#GH107556) @@ -148,6 +148,21 @@ C++ Specific Potentially Breaking Changes // Now diagnoses with an error. void f(int& i [[clang::lifetimebound]]); +- Clang will now produce an error diagnostic when ``[[clang::lifetimebound]]`` + is applied on a type (instead of a function parameter or an implicit object + parameter); this includes the case when the attribute is specified for an + unnamed function parameter. These were previously ignored and had no effect. + (#GH118281) + + .. code-block:: c++ + + // Now diagnoses with an error. + int* [[clang::lifetimebound]] x; + // Now diagnoses with an error. + void f(int* [[clang::lifetimebound]] i); + // Now diagnoses with an error. + void g(int* [[clang::lifetimebound]]); + - Clang now rejects all field accesses on null pointers in constant expressions. The following code used to work but will now be rejected: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index f32edc5ac0644..75130436282fb 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8612,7 +8612,11 @@ static void HandleLifetimeBoundAttr(TypeProcessingState &State, CurType = State.getAttributedType( createSimpleAttr(State.getSema().Context, Attr), CurType, CurType); + return; } + State.getSema().Diag(Attr.getLoc(), diag::err_attribute_wrong_decl_type_str) + << Attr << Attr.isRegularKeywordAttribute() + << "parameters and implicit object parameters"; } static void HandleLifetimeCaptureByAttr(TypeProcessingState &State, diff --git a/clang/test/SemaCXX/attr-lifetimebound.cpp b/clang/test/SemaCXX/attr-lifetimebound.cpp index f89b556f5bba0..c7abec61873ef 100644 --- a/clang/test/SemaCXX/attr-lifetimebound.cpp +++ b/clang/test/SemaCXX/attr-lifetimebound.cpp @@ -9,11 +9,25 @@ namespace usage_invalid { ~A() [[clang::lifetimebound]]; // expected-error {{cannot be applied to a destructor}} static int *static_class_member() [[clang::lifetimebound]]; // expected-error {{static member function has no implicit object parameter}} int *explicit_object(this A&) [[clang::lifetimebound]]; // expected-error {{explicit object member function has no implicit object parameter}} - int not_function [[clang::lifetimebound]]; // expected-error {{only applies to parameters and implicit object parameters}} - int [[clang::lifetimebound]] also_not_function; // expected-error {{cannot be applied to types}} + int attr_on_var [[clang::lifetimebound]]; // expected-error {{only applies to parameters and implicit object parameters}} + int [[clang::lifetimebound]] attr_on_int; // expected-error {{cannot be applied to types}} + int * [[clang::lifetimebound]] attr_on_int_ptr; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + int * [[clang::lifetimebound]] * attr_on_int_ptr_ptr; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + int (* [[clang::lifetimebound]] attr_on_func_ptr)(); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} void void_return_member() [[clang::lifetimebound]]; // expected-error {{'lifetimebound' attribute cannot be applied to an implicit object parameter of a function that returns void; did you mean 'lifetime_capture_by(X)'}} }; int *attr_with_param(int ¶m [[clang::lifetimebound(42)]]); // expected-error {{takes no arguments}} + + void attr_on_ptr_arg(int * [[clang::lifetimebound]] ptr); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + static_assert((int [[clang::lifetimebound]]) 12); // expected-error {{cannot be applied to types}} + int* attr_on_unnamed_arg(const int& [[clang::lifetimebound]]); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + template + int* attr_on_template_ptr_arg(T * [[clang::lifetimebound]] ptr); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + + int (*func_ptr)(int) [[clang::lifetimebound]]; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + int (*(*func_ptr_ptr)(int) [[clang::lifetimebound]])(int); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + struct X {}; + int (X::*member_func_ptr)(int) [[clang::lifetimebound]]; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} } namespace usage_ok { From 0f4dc4276f8dd5c5e33c22096612702ede3c81ed Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 11:14:06 +0100 Subject: [PATCH 048/191] [clang][bytecode] Initialize elements in __builtin_elementwise_popcount (#118457) --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 + clang/test/AST/ByteCode/builtin-functions.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 2da16608e26c4..c5473322ecb28 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1791,6 +1791,7 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC, INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.atIndex(I).deref() = T::from(Arg.atIndex(I).deref().toAPSInt().popcount()); + Dst.atIndex(I).initialize(); }); } diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index d8c8d207fbc45..211ca6e164cbf 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -7,6 +7,14 @@ // RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -fexperimental-new-constant-interpreter %s -verify=expected,both // RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -verify=ref,both %s -Wno-constant-evaluated +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define LITTLE_END 1 +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define LITTLE_END 0 +#else +#error "huh?" +#endif + extern "C" { typedef decltype(sizeof(int)) size_t; extern size_t wcslen(const wchar_t *p); @@ -1140,6 +1148,10 @@ namespace ElementwisePopcount { static_assert(__builtin_elementwise_popcount(0L) == 0); static_assert(__builtin_elementwise_popcount(0xF0F0L) == 8); static_assert(__builtin_elementwise_popcount(~0LL) == 8 * sizeof(long long)); + +#if __INT_WIDTH__ == 32 + static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_popcount((vector4char){1, 2, 3, 4})) == (LITTLE_END ? 0x01020101 : 0x01010201)); +#endif } namespace BuiltinMemcpy { From 4dafb091a0336a6669e6369cb69ef1ea1b39578d Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Tue, 3 Dec 2024 10:28:12 +0000 Subject: [PATCH 049/191] Revert "Add symbol visibility macros to abi-breaking.h.cmake" (#118464) Reverts llvm/llvm-project#110898 This change has caused a cyclic module dependency `fatal error: cyclic dependency in module 'LLVM_Utils': LLVM_Utils -> LLVM_Config_ABI_Breaking -> LLVM_Utils`. Reverting for now until we the right fix. --- llvm/include/llvm/Config/abi-breaking.h.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Config/abi-breaking.h.cmake b/llvm/include/llvm/Config/abi-breaking.h.cmake index 81495f0569752..2d27e02b1d545 100644 --- a/llvm/include/llvm/Config/abi-breaking.h.cmake +++ b/llvm/include/llvm/Config/abi-breaking.h.cmake @@ -12,8 +12,6 @@ #ifndef LLVM_ABI_BREAKING_CHECKS_H #define LLVM_ABI_BREAKING_CHECKS_H -#include "llvm/Support/Compiler.h" - /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -45,12 +43,12 @@ #endif namespace llvm { #if LLVM_ENABLE_ABI_BREAKING_CHECKS -LLVM_ABI extern int EnableABIBreakingChecks; +extern int EnableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyEnableABIBreakingChecks = &EnableABIBreakingChecks; #else -LLVM_ABI extern int DisableABIBreakingChecks; +extern int DisableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyDisableABIBreakingChecks = &DisableABIBreakingChecks; From 7d7252590916bcd540c7bd4158b9d52af7a0d547 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Tue, 3 Dec 2024 11:32:52 +0100 Subject: [PATCH 050/191] [AArch64] Fix STG instruction being moved past memcpy (#117191) When merging STG instructions used for AArch64 stack tagging, we were stopping on reaching a load or store instruction, but not calls, so it was possible for an STG to be moved past a call to memcpy. This test case (reduced from fuzzer-generated C code) was the result of StackColoring merging allocas A and B into one stack slot, and StackSafetyAnalysis proving that B does not need tagging, so we end up with tagged and untagged objects in the same stack slot. The tagged object (A) is live first, so it is important that it's memory is restored to the background tag before it gets reused to hold B. --- .../Target/AArch64/AArch64FrameLowering.cpp | 2 +- .../stack-tagging-merge-past-memcpy.mir | 103 ++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-merge-past-memcpy.mir diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 2d5ae1c1d14dc..51d22a893c39a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4585,7 +4585,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, break; // Reject anything that may alias the collected instructions. - if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects()) + if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() || MI.isCall()) break; } diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-merge-past-memcpy.mir b/llvm/test/CodeGen/AArch64/stack-tagging-merge-past-memcpy.mir new file mode 100644 index 0000000000000..45f6bfe80ac2b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stack-tagging-merge-past-memcpy.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + target triple = "aarch64-unknown-none-elf" + + @glob = global [8 x i32] zeroinitializer, align 4 + + declare dso_local void @F78(ptr %B) + + define void @F55() sanitize_memtag "target-features"="+mte,+strict-align" { + entry: + %basetag = call ptr @llvm.aarch64.irg.sp(i64 0) + %A = alloca i32, i32 8, align 16 + %A.tag = call ptr @llvm.aarch64.tagp.p0(ptr %A, ptr %basetag, i64 0) + %B = alloca i32, i32 8, align 4 + %C = alloca i32, i32 8, align 16 + %C.tag = call ptr @llvm.aarch64.tagp.p0(ptr %C, ptr %basetag, i64 1) + call void @llvm.aarch64.settag(ptr %C.tag, i64 32) + call void @F56(ptr %C.tag) + call void @llvm.lifetime.start.p0(i64 32, ptr %A) + call void @llvm.aarch64.settag(ptr %A.tag, i64 32) + call void @F56(ptr %A.tag) + call void @llvm.aarch64.settag(ptr %A, i64 32) + call void @llvm.lifetime.end.p0(i64 32, ptr %A) + call void @llvm.lifetime.start.p0(i64 32, ptr %A) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @glob, i64 32, i1 false) + call void @F78(ptr %A) + call void @llvm.lifetime.end.p0(i64 32, ptr %A) + call void @llvm.aarch64.settag(ptr %C, i64 32) + ret void + } + + declare void @F56(ptr) +... +--- +name: F55 +frameInfo: + adjustsStack: true +stack: + - { id: 0, name: A, type: default, offset: 0, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -32, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 2, name: C, type: default, offset: 0, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -64, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +body: | + bb.0.entry: + ; CHECK-LABEL: name: F55 + ; CHECK: liveins: $x19, $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup EMITMTETAGGED + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 80, 0 + ; CHECK-NEXT: frame-setup STPXi killed $lr, killed $x19, $sp, 8 :: (store (s64) into %stack.3), (store (s64) into %stack.2) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: renamable $x0 = IRGstack $sp, $xzr + ; CHECK-NEXT: renamable $x19 = TAGPstack $x0, 2, renamable $x0, 1 + ; CHECK-NEXT: ST2Gi renamable $x0, renamable $x0, 0 :: (store (s256) into %ir.C.tag, align 16) + ; CHECK-NEXT: BL @F56, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK-NEXT: ST2Gi renamable $x19, renamable $x19, 0 :: (store (s256) into %ir.A.tag, align 16) + ; CHECK-NEXT: $x0 = COPY killed renamable $x19 + ; CHECK-NEXT: BL @F56, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK-NEXT: ST2Gi $sp, $sp, 2 :: (store (s256) into %ir.A, align 16) + ; CHECK-NEXT: renamable $x1 = LOADgot target-flags(aarch64-got) @glob + ; CHECK-NEXT: $x0 = ADDXri $sp, 32, 0 + ; CHECK-NEXT: dead $w2 = MOVi32imm 32, implicit-def $x2 + ; CHECK-NEXT: BL &memcpy, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit-def $sp, implicit-def dead $x0 + ; CHECK-NEXT: $x0 = ADDXri $sp, 32, 0 + ; CHECK-NEXT: BL @F78, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK-NEXT: ST2Gi $sp, $sp, 0 :: (store (s256) into %ir.C, align 16) + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 64, 0 + ; CHECK-NEXT: early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.3), (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR + renamable $x0 = IRGstack $sp, $xzr + renamable $x19 = TAGPstack %stack.0.A, 0, renamable $x0, 1 + ST2Gi renamable $x0, renamable $x0, 0 :: (store (s256) into %ir.C.tag, align 16) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @F56, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ST2Gi renamable $x19, renamable $x19, 0 :: (store (s256) into %ir.A.tag, align 16) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY killed renamable $x19 + BL @F56, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ST2Gi $sp, %stack.0.A, 0 :: (store (s256) into %ir.A, align 16) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + renamable $x1 = LOADgot target-flags(aarch64-got) @glob + $x0 = ADDXri %stack.0.A, 0, 0 + dead $w2 = MOVi32imm 32, implicit-def $x2 + BL &memcpy, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit-def $sp, implicit-def dead $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = ADDXri %stack.0.A, 0, 0 + BL @F78, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ST2Gi $sp, %stack.2.C, 0 :: (store (s256) into %ir.C, align 16) + RET_ReallyLR + +... From 9c5a84b394518cb8087eb0b14e9b55244adb61e7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 3 Dec 2024 11:40:57 +0100 Subject: [PATCH 051/191] [InstCombine] Support nusw in icmp of gep with base Proof: https://alive2.llvm.org/ce/z/omnQXt --- .../lib/Transforms/InstCombine/InstCombineCompares.cpp | 3 ++- llvm/test/Transforms/InstCombine/icmp-gep.ll | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 5871973776683..74684a81d8aac 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -691,7 +691,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS && (GEPLHS->isInBounds() || ICmpInst::isEquality(Cond))) { + if (PtrBase == RHS && + (GEPLHS->hasNoUnsignedSignedWrap() || ICmpInst::isEquality(Cond))) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). Value *Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index 776716fe90873..aaa04ade7513e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -133,6 +133,16 @@ define i1 @ult_base_inbounds(ptr %x, i64 %y) { ret i1 %r } +define i1 @ult_base_nusw(ptr %x, i64 %y) { +; CHECK-LABEL: @ult_base_nusw( +; CHECK-NEXT: [[R:%.*]] = icmp slt i64 [[Y:%.*]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %g = getelementptr nusw i8, ptr %x, i64 %y + %r = icmp ult ptr %g, %x + ret i1 %r +} + define i1 @ugt_base_inbounds_commute(i64 %y) { ; CHECK-LABEL: @ugt_base_inbounds_commute( ; CHECK-NEXT: [[X:%.*]] = call ptr @getptr() From bdc6faf775bc52b5225013312149dfba21f88aa2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 3 Dec 2024 11:44:46 +0100 Subject: [PATCH 052/191] [InstCombine] Support nusw in icmp of two geps with same base Proof: https://alive2.llvm.org/ce/z/BYNQ7s --- .../InstCombine/InstCombineCompares.cpp | 6 ++--- llvm/test/Transforms/InstCombine/icmp-gep.ll | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 74684a81d8aac..579214c28fc30 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -786,7 +786,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this); } - bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); + GEPNoWrapFlags NW = GEPLHS->getNoWrapFlags() & GEPRHS->getNoWrapFlags(); if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) { // If the GEPs only differ by one index, compare it. @@ -814,7 +814,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return replaceInstUsesWith(I, // No comparison is needed here. ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond))); - else if (NumDifferences == 1 && GEPsInBounds) { + else if (NumDifferences == 1 && NW.hasNoUnsignedSignedWrap()) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); // Make sure we do a signed comparison here. @@ -822,7 +822,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } } - if (GEPsInBounds || CmpInst::isEquality(Cond)) { + if (NW.hasNoUnsignedSignedWrap() || CmpInst::isEquality(Cond)) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS, /*RewriteGEP=*/true); Value *R = EmitGEPOffset(GEPRHS, /*RewriteGEP=*/true); diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index aaa04ade7513e..1545d034b2ac3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -295,6 +295,30 @@ define i1 @test60(ptr %foo, i64 %i, i64 %j) { ret i1 %cmp } +define i1 @test60_nusw(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_nusw( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr nusw i32, ptr %foo, i64 %i + %gep2 = getelementptr nusw i8, ptr %foo, i64 %j + %cmp = icmp ult ptr %gep1, %gep2 + ret i1 %cmp +} + +define i1 @test60_nusw_inbounds(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_nusw_inbounds( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr nusw i32, ptr %foo, i64 %i + %gep2 = getelementptr inbounds i8, ptr %foo, i64 %j + %cmp = icmp ult ptr %gep1, %gep2 + ret i1 %cmp +} + define i1 @test_gep_ult_no_inbounds(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test_gep_ult_no_inbounds( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[FOO:%.*]], i64 [[I:%.*]] From 3f39c5df08d4ca1e7f852908e9fb255db24538da Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 11:51:49 +0100 Subject: [PATCH 053/191] [clang][bytecode] Reject memcpy dummy pointers after null check (#118460) To match the diagnostic output of the current interpreter. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 ++++--- clang/test/AST/ByteCode/builtin-functions.cpp | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index c5473322ecb28..b788656f9484f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1813,9 +1813,6 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, bool Move = (ID == Builtin::BI__builtin_memmove || ID == Builtin::BImemmove); - if (DestPtr.isDummy() || SrcPtr.isDummy()) - return false; - // If the size is zero, we treat this as always being a valid no-op. if (Size.isZero()) { S.Stk.push(DestPtr); @@ -1830,6 +1827,10 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, return false; } + // As a last resort, reject dummy pointers. + if (DestPtr.isDummy() || SrcPtr.isDummy()) + return false; + if (!DoBitCastPtr(S, OpPC, SrcPtr, DestPtr)) return false; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 211ca6e164cbf..b951c04dde598 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1169,6 +1169,10 @@ namespace BuiltinMemcpy { static_assert(__builtin_memcpy(null_incomplete, null_incomplete, sizeof(wchar_t))); // both-error {{not an integral constant expression}} \ // both-note {{source of 'memcpy' is nullptr}} + wchar_t global; + constexpr wchar_t *null = 0; + static_assert(__builtin_memcpy(&global, null, sizeof(wchar_t))); // both-error {{not an integral constant expression}} \ + // both-note {{source of 'memcpy' is nullptr}} constexpr int simpleMove() { int a = 12; From 51b74bb9f6457cbe53776a2a35296189c5db52f3 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 3 Dec 2024 10:29:50 +0100 Subject: [PATCH 054/191] Reapply "[lldb] Use the function block as a source for function ranges (#117996)" This reverts commit 2526d5b1689389da9b194b5ec2878cfb2f4aca93, reapplying ba14dac481564000339ba22ab867617590184f4c after fixing the conflict with #117532. The change is that Function::GetAddressRanges now recomputes the returned value instead of returning the member. This means it now returns a value instead of a reference type. --- lldb/include/lldb/Symbol/Function.h | 5 +- lldb/source/API/SBFunction.cpp | 2 +- .../Breakpad/SymbolFileBreakpad.cpp | 4 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 179 +++++++----------- .../SymbolFile/DWARF/SymbolFileDWARF.h | 3 +- .../NativePDB/SymbolFileNativePDB.cpp | 12 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 52 ++--- lldb/source/Symbol/Function.cpp | 16 +- .../Python/sb_function_ranges.s | 2 +- .../DWARF/x86/discontinuous-function.s | 2 +- .../SymbolFile/PDB/function-nested-block.test | 1 - 11 files changed, 112 insertions(+), 166 deletions(-) diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 855940a6415d7..e4118c1f9be86 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -447,7 +447,7 @@ class Function : public UserID, public SymbolContextScope { /// DEPRECATED: Use GetAddressRanges instead. const AddressRange &GetAddressRange() { return m_range; } - const AddressRanges &GetAddressRanges() const { return m_ranges; } + AddressRanges GetAddressRanges() { return m_block.GetRanges(); } lldb::LanguageType GetLanguage() const; /// Find the file and line number of the source location of the start of the @@ -653,9 +653,6 @@ class Function : public UserID, public SymbolContextScope { /// All lexical blocks contained in this function. Block m_block; - /// List of address ranges belonging to the function. - AddressRanges m_ranges; - /// The function address range that covers the widest range needed to contain /// all blocks. DEPRECATED: do not use this field in new code as the range may /// include addresses belonging to other functions. diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index 2ef62eea4d199..3f6b4eea98318 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -154,7 +154,7 @@ SBAddress SBFunction::GetEndAddress() { SBAddress addr; if (m_opaque_ptr) { - llvm::ArrayRef ranges = m_opaque_ptr->GetAddressRanges(); + AddressRanges ranges = m_opaque_ptr->GetAddressRanges(); if (!ranges.empty()) { // Return the end of the first range, use GetRanges to get all ranges. addr.SetAddress(ranges.front().GetBaseAddress()); diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index df3bf157278da..bc886259d6fa5 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -299,9 +299,7 @@ size_t SymbolFileBreakpad::ParseBlocksRecursive(Function &func) { // "INLINE 0 ...", the current level is 0 and its parent block is the // function block at index 0. std::vector blocks; - Block &block = func.GetBlock(false); - block.AddRange(Block::Range(0, func.GetAddressRange().GetByteSize())); - blocks.push_back(&block); + blocks.push_back(&func.GetBlock(false)); size_t blocks_added = 0; addr_t func_base = func.GetAddressRange().GetBaseAddress().GetOffset(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index fe711c56958c4..6f19b264eb3dd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1305,121 +1305,76 @@ bool SymbolFileDWARF::ParseDebugMacros(CompileUnit &comp_unit) { return true; } -size_t SymbolFileDWARF::ParseBlocksRecursive( - lldb_private::CompileUnit &comp_unit, Block *parent_block, - const DWARFDIE &orig_die, addr_t subprogram_low_pc, uint32_t depth) { +size_t SymbolFileDWARF::ParseBlocksRecursive(CompileUnit &comp_unit, + Block *parent_block, DWARFDIE die, + addr_t subprogram_low_pc) { size_t blocks_added = 0; - DWARFDIE die = orig_die; - while (die) { + for (; die; die = die.GetSibling()) { dw_tag_t tag = die.Tag(); - switch (tag) { - case DW_TAG_inlined_subroutine: - case DW_TAG_subprogram: - case DW_TAG_lexical_block: { - Block *block = nullptr; - if (tag == DW_TAG_subprogram) { - // Skip any DW_TAG_subprogram DIEs that are inside of a normal or - // inlined functions. These will be parsed on their own as separate - // entities. - - if (depth > 0) - break; + if (tag != DW_TAG_inlined_subroutine && tag != DW_TAG_lexical_block) + continue; - block = parent_block; - } else { - block = parent_block->CreateChild(die.GetID()).get(); - } - DWARFRangeList ranges; - const char *name = nullptr; - const char *mangled_name = nullptr; - - std::optional decl_file; - std::optional decl_line; - std::optional decl_column; - std::optional call_file; - std::optional call_line; - std::optional call_column; - if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, - decl_line, decl_column, call_file, call_line, - call_column, nullptr)) { - if (tag == DW_TAG_subprogram) { - assert(subprogram_low_pc == LLDB_INVALID_ADDRESS); - subprogram_low_pc = ranges.GetMinRangeBase(0); - } else if (tag == DW_TAG_inlined_subroutine) { - // We get called here for inlined subroutines in two ways. The first - // time is when we are making the Function object for this inlined - // concrete instance. Since we're creating a top level block at - // here, the subprogram_low_pc will be LLDB_INVALID_ADDRESS. So we - // need to adjust the containing address. The second time is when we - // are parsing the blocks inside the function that contains the - // inlined concrete instance. Since these will be blocks inside the - // containing "real" function the offset will be for that function. - if (subprogram_low_pc == LLDB_INVALID_ADDRESS) { - subprogram_low_pc = ranges.GetMinRangeBase(0); - } - } - - const size_t num_ranges = ranges.GetSize(); - for (size_t i = 0; i < num_ranges; ++i) { - const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); - const addr_t range_base = range.GetRangeBase(); - if (range_base >= subprogram_low_pc) - block->AddRange(Block::Range(range_base - subprogram_low_pc, - range.GetByteSize())); - else { - GetObjectFile()->GetModule()->ReportError( - "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " - "that is less than the function's low PC {3:x16}. Please file " - "a bug and attach the file at the " - "start of this error message", - block->GetID(), range_base, range.GetRangeEnd(), - subprogram_low_pc); - } - } - block->FinalizeRanges(); - - if (tag != DW_TAG_subprogram && - (name != nullptr || mangled_name != nullptr)) { - std::unique_ptr decl_up; - if (decl_file || decl_line || decl_column) - decl_up = std::make_unique( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - decl_file ? *decl_file : 0), - decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); - - std::unique_ptr call_up; - if (call_file || call_line || call_column) - call_up = std::make_unique( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - call_file ? *call_file : 0), - call_line ? *call_line : 0, call_column ? *call_column : 0); - - block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), - call_up.get()); + Block *block = parent_block->CreateChild(die.GetID()).get(); + DWARFRangeList ranges; + const char *name = nullptr; + const char *mangled_name = nullptr; + + std::optional decl_file; + std::optional decl_line; + std::optional decl_column; + std::optional call_file; + std::optional call_line; + std::optional call_column; + if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, + decl_line, decl_column, call_file, call_line, + call_column, nullptr)) { + const size_t num_ranges = ranges.GetSize(); + for (size_t i = 0; i < num_ranges; ++i) { + const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); + const addr_t range_base = range.GetRangeBase(); + if (range_base >= subprogram_low_pc) + block->AddRange(Block::Range(range_base - subprogram_low_pc, + range.GetByteSize())); + else { + GetObjectFile()->GetModule()->ReportError( + "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " + "that is less than the function's low PC {3:x16}. Please file " + "a bug and attach the file at the " + "start of this error message", + block->GetID(), range_base, range.GetRangeEnd(), + subprogram_low_pc); } + } + block->FinalizeRanges(); + + if (tag != DW_TAG_subprogram && + (name != nullptr || mangled_name != nullptr)) { + std::unique_ptr decl_up; + if (decl_file || decl_line || decl_column) + decl_up = std::make_unique( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + decl_file ? *decl_file : 0), + decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); + + std::unique_ptr call_up; + if (call_file || call_line || call_column) + call_up = std::make_unique( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + call_file ? *call_file : 0), + call_line ? *call_line : 0, call_column ? *call_column : 0); + + block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), + call_up.get()); + } - ++blocks_added; + ++blocks_added; - if (die.HasChildren()) { - blocks_added += - ParseBlocksRecursive(comp_unit, block, die.GetFirstChild(), - subprogram_low_pc, depth + 1); - } + if (die.HasChildren()) { + blocks_added += ParseBlocksRecursive( + comp_unit, block, die.GetFirstChild(), subprogram_low_pc); } - } break; - default: - break; } - - // Only parse siblings of the block if we are not at depth zero. A depth of - // zero indicates we are currently parsing the top level DW_TAG_subprogram - // DIE - - if (depth == 0) - die.Clear(); - else - die = die.GetSibling(); } return blocks_added; } @@ -3240,8 +3195,16 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(Function &func) { DWARFDIE function_die = dwarf_cu->GetNonSkeletonUnit().GetDIE(function_die_offset); if (function_die) { - ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), function_die, - LLDB_INVALID_ADDRESS, 0); + // We can't use the file address from the Function object as (in the OSO + // case) it will already be remapped to the main module. + DWARFRangeList ranges = function_die.GetDIE()->GetAttributeAddressRanges( + function_die.GetCU(), + /*check_hi_lo_pc=*/true); + lldb::addr_t function_file_addr = + ranges.GetMinRangeBase(LLDB_INVALID_ADDRESS); + if (function_file_addr != LLDB_INVALID_ADDRESS) + ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), + function_die.GetFirstChild(), function_file_addr); } return functions_added; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index ac25a0c48ee7d..76f4188fdf4af 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -395,8 +395,7 @@ class SymbolFileDWARF : public SymbolFileCommon { Function *ParseFunction(CompileUnit &comp_unit, const DWARFDIE &die); size_t ParseBlocksRecursive(CompileUnit &comp_unit, Block *parent_block, - const DWARFDIE &die, - lldb::addr_t subprogram_low_pc, uint32_t depth); + DWARFDIE die, lldb::addr_t subprogram_low_pc); size_t ParseTypes(const SymbolContext &sc, const DWARFDIE &die, bool parse_siblings, bool parse_children); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index d17fedf26b4c4..27d51bbd1cb56 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -394,18 +394,12 @@ Block *SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { switch (sym.kind()) { case S_GPROC32: - case S_LPROC32: { + case S_LPROC32: // This is a function. It must be global. Creating the Function entry // for it automatically creates a block for it. - FunctionSP func = GetOrCreateFunction(block_id, *comp_unit); - if (func) { - Block &block = func->GetBlock(false); - if (block.GetNumRanges() == 0) - block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); - return █ - } + if (FunctionSP func = GetOrCreateFunction(block_id, *comp_unit)) + return &func->GetBlock(false); break; - } case S_BLOCK32: { // This is a block. Its parent is either a function or another block. In // either case, its parent can be viewed as a block (e.g. a function diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 4935b0fbdfd87..b7854c05d345a 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -402,44 +402,32 @@ static size_t ParseFunctionBlocksForPDBSymbol( assert(pdb_symbol && parent_block); size_t num_added = 0; - switch (pdb_symbol->getSymTag()) { - case PDB_SymType::Block: - case PDB_SymType::Function: { - Block *block = nullptr; - auto &raw_sym = pdb_symbol->getRawSymbol(); - if (auto *pdb_func = llvm::dyn_cast(pdb_symbol)) { - if (pdb_func->hasNoInlineAttribute()) - break; - if (is_top_parent) - block = parent_block; - else - break; - } else if (llvm::isa(pdb_symbol)) { - auto uid = pdb_symbol->getSymIndexId(); - if (parent_block->FindBlockByID(uid)) - break; - if (raw_sym.getVirtualAddress() < func_file_vm_addr) - break; - block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); - } else - llvm_unreachable("Unexpected PDB symbol!"); + if (!is_top_parent) { + // Ranges for the top block were parsed together with the function. + if (pdb_symbol->getSymTag() != PDB_SymType::Block) + return num_added; + auto &raw_sym = pdb_symbol->getRawSymbol(); + assert(llvm::isa(pdb_symbol)); + auto uid = pdb_symbol->getSymIndexId(); + if (parent_block->FindBlockByID(uid)) + return num_added; + if (raw_sym.getVirtualAddress() < func_file_vm_addr) + return num_added; + + Block *block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); block->AddRange(Block::Range( raw_sym.getVirtualAddress() - func_file_vm_addr, raw_sym.getLength())); block->FinalizeRanges(); - ++num_added; + } + auto results_up = pdb_symbol->findAllChildren(); + if (!results_up) + return num_added; - auto results_up = pdb_symbol->findAllChildren(); - if (!results_up) - break; - while (auto symbol_up = results_up->getNext()) { - num_added += ParseFunctionBlocksForPDBSymbol( - func_file_vm_addr, symbol_up.get(), block, false); - } - } break; - default: - break; + while (auto symbol_up = results_up->getNext()) { + num_added += ParseFunctionBlocksForPDBSymbol( + func_file_vm_addr, symbol_up.get(), parent_block, false); } return num_added; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index b346749ca06ec..4f07b946353a4 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -279,9 +279,14 @@ Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, AddressRanges ranges) : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid), m_type(type), m_mangled(mangled), m_block(*this, func_uid), - m_ranges(std::move(ranges)), m_range(CollapseRanges(m_ranges)), - m_frame_base(), m_flags(), m_prologue_byte_size(0) { + m_range(CollapseRanges(ranges)), m_prologue_byte_size(0) { assert(comp_unit != nullptr); + lldb::addr_t base_file_addr = m_range.GetBaseAddress().GetFileAddress(); + for (const AddressRange &range : ranges) + m_block.AddRange( + Block::Range(range.GetBaseAddress().GetFileAddress() - base_file_addr, + range.GetByteSize())); + m_block.FinalizeRanges(); } Function::~Function() = default; @@ -426,13 +431,16 @@ void Function::GetDescription(Stream *s, lldb::DescriptionLevel level, llvm::interleaveComma(decl_context, *s, [&](auto &ctx) { ctx.Dump(*s); }); *s << "}"; } - *s << ", range" << (m_ranges.size() > 1 ? "s" : "") << " = "; + *s << ", range" << (m_block.GetNumRanges() > 1 ? "s" : "") << " = "; Address::DumpStyle fallback_style = level == eDescriptionLevelVerbose ? Address::DumpStyleModuleWithFileAddress : Address::DumpStyleFileAddress; - for (const AddressRange &range : m_ranges) + for (unsigned idx = 0; idx < m_block.GetNumRanges(); ++idx) { + AddressRange range; + m_block.GetRangeAtIndex(idx, range); range.Dump(s, target, Address::DumpStyleLoadAddress, fallback_style); + } } void Function::Dump(Stream *s, bool show_context) const { diff --git a/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s index 09b41148c7068..a9e4104f2aaf7 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s +++ b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s @@ -5,7 +5,7 @@ # RUN: %lldb %t/input.o -o "command script import %t/script.py" -o exit | FileCheck %s # CHECK: Found 1 function(s). -# CHECK: foo: [input.o[0x0-0x7), input.o[0x7-0xe), input.o[0x14-0x1b), input.o[0x1b-0x1c)] +# CHECK: foo: [input.o[0x0-0xe), input.o[0x14-0x1c)] #--- script.py import lldb diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s index 2584158207cc8..b03d5d12ad2a1 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s @@ -10,7 +10,7 @@ # CHECK: 1 match found in {{.*}} # CHECK: Summary: {{.*}}`foo -# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x0000000000000007)[0x0000000000000007-0x000000000000000e)[0x0000000000000014-0x000000000000001b)[0x000000000000001b-0x000000000000001c) +# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x000000000000000e)[0x0000000000000014-0x000000000000001c) .text diff --git a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test index 1cb20a4036382..9057d01c25840 100644 --- a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test +++ b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test @@ -2,7 +2,6 @@ REQUIRES: system-windows, lld RUN: %build --compiler=clang-cl --nodefaultlib --output=%t.exe %S/Inputs/FunctionNestedBlockTest.cpp RUN: lldb-test symbols -find=function -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-FUNCTION %s RUN: lldb-test symbols -find=block -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-BLOCK %s -XFAIL: * CHECK-FUNCTION: Found 1 functions: CHECK-FUNCTION: name = "{{.*}}", mangled = "{{_?}}main" From 9a4c5a59d4ec0c582f56b221a64889c077f68376 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Tue, 3 Dec 2024 11:04:04 +0000 Subject: [PATCH 055/191] Revert "Re-apply [lldb] Do not use LC_FUNCTION_STARTS data to determine symbol size as symbols are created (#117079)" This reverts commit ba668eb99c5dc37d3c5cf2775079562460fd7619. Below test started failing again on x86_64 macOS CI. We're unsure if this patch is the exact cause, but since this patch has broken this test before, we speculatively revert it to see if it was indeed the root cause. ``` FAIL: lldb-shell :: Unwind/trap_frame_sym_ctx.test (1692 of 2162) ******************** TEST 'lldb-shell :: Unwind/trap_frame_sym_ctx.test' FAILED ******************** Exit Code: 1 Command Output (stderr): -- RUN: at line 7: /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/bin/clang --target=specify-a-target-or-use-a-_host-substitution --target=x86_64-apple-darwin22.6.0 -isysroot /Applications/Xcode-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk -fmodules-cache-path=/Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/lldb-test-build.noindex/module-cache-clang/lldb-shell /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/Inputs/call-asm.c /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/Inputs/trap_frame_sym_ctx.s -o /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/tools/lldb/test/Shell/Unwind/Output/trap_frame_sym_ctx.test.tmp + /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/bin/clang --target=specify-a-target-or-use-a-_host-substitution --target=x86_64-apple-darwin22.6.0 -isysroot /Applications/Xcode-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk -fmodules-cache-path=/Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/lldb-test-build.noindex/module-cache-clang/lldb-shell /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/Inputs/call-asm.c /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/Inputs/trap_frame_sym_ctx.s -o /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/tools/lldb/test/Shell/Unwind/Output/trap_frame_sym_ctx.test.tmp clang: warning: argument unused during compilation: '-fmodules-cache-path=/Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/lldb-test-build.noindex/module-cache-clang/lldb-shell' [-Wunused-command-line-argument] RUN: at line 8: /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/bin/lldb --no-lldbinit -S /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/tools/lldb/test/Shell/lit-lldb-init-quiet /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/tools/lldb/test/Shell/Unwind/Output/trap_frame_sym_ctx.test.tmp -s /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/trap_frame_sym_ctx.test -o exit | /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/bin/FileCheck /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/trap_frame_sym_ctx.test + /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/bin/lldb --no-lldbinit -S /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/tools/lldb/test/Shell/lit-lldb-init-quiet /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/tools/lldb/test/Shell/Unwind/Output/trap_frame_sym_ctx.test.tmp -s /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/trap_frame_sym_ctx.test -o exit + /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/lldb-build/bin/FileCheck /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/trap_frame_sym_ctx.test /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/trap_frame_sym_ctx.test:21:10: error: CHECK: expected string not found in input ^ :26:64: note: scanning from here frame #1: 0x0000000100003ee9 trap_frame_sym_ctx.test.tmp`tramp ^ :27:2: note: possible intended match here frame #2: 0x00007ff7bfeff6c0 ^ Input file: Check file: /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake/llvm-project/lldb/test/Shell/Unwind/trap_frame_sym_ctx.test -dump-input=help explains the following input dump. Input was: <<<<<< . . . 21: 0x100003ed1 <+0>: pushq %rbp 22: 0x100003ed2 <+1>: movq %rsp, %rbp 23: (lldb) thread backtrace -u 24: * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 25: * frame #0: 0x0000000100003ecc trap_frame_sym_ctx.test.tmp`bar 26: frame #1: 0x0000000100003ee9 trap_frame_sym_ctx.test.tmp`tramp check:21'0 X error: no match found 27: frame #2: 0x00007ff7bfeff6c0 check:21'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check:21'1 ? possible intended match 28: frame #3: 0x0000000100003ec6 trap_frame_sym_ctx.test.tmp`main + 22 check:21'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29: frame #4: 0x0000000100003ec6 trap_frame_sym_ctx.test.tmp`main + 22 check:21'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 30: frame #5: 0x00007ff8193cc41f dyld`start + 1903 check:21'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31: (lldb) exit check:21'0 ~~~~~~~~~~~~ >>>>>> ``` --- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 4aa85a99edf01..daffa1379fe57 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -3775,6 +3775,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { SymbolType type = eSymbolTypeInvalid; SectionSP symbol_section; + lldb::addr_t symbol_byte_size = 0; bool add_nlist = true; bool is_gsym = false; bool demangled_is_synthesized = false; @@ -4360,6 +4361,47 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { if (symbol_section) { const addr_t section_file_addr = symbol_section->GetFileAddress(); + if (symbol_byte_size == 0 && function_starts_count > 0) { + addr_t symbol_lookup_file_addr = nlist.n_value; + // Do an exact address match for non-ARM addresses, else get the + // closest since the symbol might be a thumb symbol which has an + // address with bit zero set. + FunctionStarts::Entry *func_start_entry = + function_starts.FindEntry(symbol_lookup_file_addr, !is_arm); + if (is_arm && func_start_entry) { + // Verify that the function start address is the symbol address + // (ARM) or the symbol address + 1 (thumb). + if (func_start_entry->addr != symbol_lookup_file_addr && + func_start_entry->addr != (symbol_lookup_file_addr + 1)) { + // Not the right entry, NULL it out... + func_start_entry = nullptr; + } + } + if (func_start_entry) { + func_start_entry->data = true; + + addr_t symbol_file_addr = func_start_entry->addr; + if (is_arm) + symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; + + const FunctionStarts::Entry *next_func_start_entry = + function_starts.FindNextEntry(func_start_entry); + const addr_t section_end_file_addr = + section_file_addr + symbol_section->GetByteSize(); + if (next_func_start_entry) { + addr_t next_symbol_file_addr = next_func_start_entry->addr; + // Be sure the clear the Thumb address bit when we calculate the + // size from the current and next address + if (is_arm) + next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; + symbol_byte_size = std::min( + next_symbol_file_addr - symbol_file_addr, + section_end_file_addr - symbol_file_addr); + } else { + symbol_byte_size = section_end_file_addr - symbol_file_addr; + } + } + } symbol_value -= section_file_addr; } @@ -4466,6 +4508,9 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { if (nlist.n_desc & N_WEAK_REF) sym[sym_idx].SetIsWeak(true); + if (symbol_byte_size > 0) + sym[sym_idx].SetByteSize(symbol_byte_size); + if (demangled_is_synthesized) sym[sym_idx].SetDemangledNameIsSynthesized(true); @@ -4584,7 +4629,23 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { Address symbol_addr; if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { SectionSP symbol_section(symbol_addr.GetSection()); + uint32_t symbol_byte_size = 0; if (symbol_section) { + const addr_t section_file_addr = symbol_section->GetFileAddress(); + const FunctionStarts::Entry *next_func_start_entry = + function_starts.FindNextEntry(func_start_entry); + const addr_t section_end_file_addr = + section_file_addr + symbol_section->GetByteSize(); + if (next_func_start_entry) { + addr_t next_symbol_file_addr = next_func_start_entry->addr; + if (is_arm) + next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; + symbol_byte_size = std::min( + next_symbol_file_addr - symbol_file_addr, + section_end_file_addr - symbol_file_addr); + } else { + symbol_byte_size = section_end_file_addr - symbol_file_addr; + } sym[sym_idx].SetID(synthetic_sym_id++); // Don't set the name for any synthetic symbols, the Symbol // object will generate one if needed when the name is accessed @@ -4596,6 +4657,8 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { add_symbol_addr(symbol_addr.GetFileAddress()); if (symbol_flags) sym[sym_idx].SetFlags(symbol_flags); + if (symbol_byte_size) + sym[sym_idx].SetByteSize(symbol_byte_size); ++sym_idx; } } From f4ce7e037828eaf3720c9169e94e85b7a106deba Mon Sep 17 00:00:00 2001 From: Jerry-Ge Date: Tue, 3 Dec 2024 03:12:43 -0800 Subject: [PATCH 056/191] [MLIR][TOSA] Update PowOp output name from z to output (#118407) To match the latest specification: https://www.mlplatform.org/tosa/tosa_spec.html#_pow Signed-off-by: Jerry Ge --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 9f57efff5d1fd..c786f396a24cd 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -828,7 +828,7 @@ def Tosa_PowOp : Tosa_ElementwiseOp<"pow", [SameOperandsAndResultElementType]> { ); let results = (outs - Tosa_Tensor:$z + Tosa_Tensor:$output ); } From 356df2dd72e8299b5de58e9390283110c19f7c76 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 3 Dec 2024 12:10:57 +0100 Subject: [PATCH 057/191] Revert "[clang] Warn [[clang::lifetimebound]] misusages on types (#118281)" Temporarily revert the patch to give downstream teams some time to clean up their codebases. This reverts commit 4849d593ab07c47f9f520bea636f62d159d57006. --- clang/docs/ReleaseNotes.rst | 17 +---------------- clang/lib/Sema/SemaType.cpp | 4 ---- clang/test/SemaCXX/attr-lifetimebound.cpp | 18 ++---------------- 3 files changed, 3 insertions(+), 36 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4e4dcd83cc28e..01c7899e36c93 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -139,7 +139,7 @@ C++ Specific Potentially Breaking Changes // Fixed version: unsigned operator""_udl_name(unsigned long long); -- Clang will now produce an error diagnostic when ``[[clang::lifetimebound]]`` is +- Clang will now produce an error diagnostic when [[clang::lifetimebound]] is applied on a parameter or an implicit object parameter of a function that returns void. This was previously ignored and had no effect. (#GH107556) @@ -148,21 +148,6 @@ C++ Specific Potentially Breaking Changes // Now diagnoses with an error. void f(int& i [[clang::lifetimebound]]); -- Clang will now produce an error diagnostic when ``[[clang::lifetimebound]]`` - is applied on a type (instead of a function parameter or an implicit object - parameter); this includes the case when the attribute is specified for an - unnamed function parameter. These were previously ignored and had no effect. - (#GH118281) - - .. code-block:: c++ - - // Now diagnoses with an error. - int* [[clang::lifetimebound]] x; - // Now diagnoses with an error. - void f(int* [[clang::lifetimebound]] i); - // Now diagnoses with an error. - void g(int* [[clang::lifetimebound]]); - - Clang now rejects all field accesses on null pointers in constant expressions. The following code used to work but will now be rejected: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 75130436282fb..f32edc5ac0644 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8612,11 +8612,7 @@ static void HandleLifetimeBoundAttr(TypeProcessingState &State, CurType = State.getAttributedType( createSimpleAttr(State.getSema().Context, Attr), CurType, CurType); - return; } - State.getSema().Diag(Attr.getLoc(), diag::err_attribute_wrong_decl_type_str) - << Attr << Attr.isRegularKeywordAttribute() - << "parameters and implicit object parameters"; } static void HandleLifetimeCaptureByAttr(TypeProcessingState &State, diff --git a/clang/test/SemaCXX/attr-lifetimebound.cpp b/clang/test/SemaCXX/attr-lifetimebound.cpp index c7abec61873ef..f89b556f5bba0 100644 --- a/clang/test/SemaCXX/attr-lifetimebound.cpp +++ b/clang/test/SemaCXX/attr-lifetimebound.cpp @@ -9,25 +9,11 @@ namespace usage_invalid { ~A() [[clang::lifetimebound]]; // expected-error {{cannot be applied to a destructor}} static int *static_class_member() [[clang::lifetimebound]]; // expected-error {{static member function has no implicit object parameter}} int *explicit_object(this A&) [[clang::lifetimebound]]; // expected-error {{explicit object member function has no implicit object parameter}} - int attr_on_var [[clang::lifetimebound]]; // expected-error {{only applies to parameters and implicit object parameters}} - int [[clang::lifetimebound]] attr_on_int; // expected-error {{cannot be applied to types}} - int * [[clang::lifetimebound]] attr_on_int_ptr; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - int * [[clang::lifetimebound]] * attr_on_int_ptr_ptr; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - int (* [[clang::lifetimebound]] attr_on_func_ptr)(); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} + int not_function [[clang::lifetimebound]]; // expected-error {{only applies to parameters and implicit object parameters}} + int [[clang::lifetimebound]] also_not_function; // expected-error {{cannot be applied to types}} void void_return_member() [[clang::lifetimebound]]; // expected-error {{'lifetimebound' attribute cannot be applied to an implicit object parameter of a function that returns void; did you mean 'lifetime_capture_by(X)'}} }; int *attr_with_param(int ¶m [[clang::lifetimebound(42)]]); // expected-error {{takes no arguments}} - - void attr_on_ptr_arg(int * [[clang::lifetimebound]] ptr); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - static_assert((int [[clang::lifetimebound]]) 12); // expected-error {{cannot be applied to types}} - int* attr_on_unnamed_arg(const int& [[clang::lifetimebound]]); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - template - int* attr_on_template_ptr_arg(T * [[clang::lifetimebound]] ptr); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - - int (*func_ptr)(int) [[clang::lifetimebound]]; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - int (*(*func_ptr_ptr)(int) [[clang::lifetimebound]])(int); // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} - struct X {}; - int (X::*member_func_ptr)(int) [[clang::lifetimebound]]; // expected-error {{'lifetimebound' attribute only applies to parameters and implicit object parameters}} } namespace usage_ok { From 62923275565e3a0f288146f2ce0569fe2ac26bf5 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Dec 2024 11:44:54 +0000 Subject: [PATCH 058/191] [lld] Convert CODE_OWNERS.txt to Maintainers.md (#118084) To align with https://llvm.org/docs/DeveloperPolicy.html#maintainers I've moved the file and changed it to markdown to match `llvm/`'s file. --- lld/CODE_OWNERS.TXT | 26 -------------------------- lld/Maintainers.md | 40 ++++++++++++++++++++++++++++++++++++++++ llvm/Maintainers.md | 2 +- 3 files changed, 41 insertions(+), 27 deletions(-) delete mode 100644 lld/CODE_OWNERS.TXT create mode 100644 lld/Maintainers.md diff --git a/lld/CODE_OWNERS.TXT b/lld/CODE_OWNERS.TXT deleted file mode 100644 index 44972c0d345a8..0000000000000 --- a/lld/CODE_OWNERS.TXT +++ /dev/null @@ -1,26 +0,0 @@ -This file is a list of the people responsible for ensuring that patches for a -particular part of LLD are reviewed, either by themself or by someone else. -They are also the gatekeepers for their part of LLD, with the final word on -what goes in or not. - -The list is sorted by surname and formatted to allow easy grepping and -beautification by scripts. The fields are: name (N), email (E), web-address -(W), PGP key ID and fingerprint (P), description (D), and snail-mail address -(S). Each entry should contain at least the (N), (E) and (D) fields. - - -N: Rui Ueyama -E: ruiu@google.com -D: COFF, ELF backends (COFF/* ELF/*) - -N: Lang Hames, Nick Kledzik -E: lhames@gmail.com, kledzik@apple.com -D: Old Mach-O backend - -N: Sam Clegg -E: sbc@chromium.org -D: WebAssembly backend (wasm/*) - -N: Jez Ng, Greg McGary, Shoaib Meenai -E: jezng@fb.com, gkm@fb.com, smeenai@fb.com -D: New Mach-O backend diff --git a/lld/Maintainers.md b/lld/Maintainers.md new file mode 100644 index 0000000000000..d210246267656 --- /dev/null +++ b/lld/Maintainers.md @@ -0,0 +1,40 @@ +# LLD Maintainers + +This file is a list of the +[maintainers](https://llvm.org/docs/DeveloperPolicy.html#maintainers) for +LLD. + +## Current Maintainers + +The following people are the active maintainers for the project. Please reach +out to them for code reviews, questions about their area of expertise, or other +assistance. + +### COFF, ELF backends (COFF/* ELF/*) + +Rui Ueyama \ +ruiu@google.com (email) + +### Old Mach-O backend + +Lang Hames \ +lhames@gmail.com (email) + +Nick Kledzik \ +kledzik@apple.com (email) + +### WebAssembly backend (wasm/*) + +Sam Clegg \ +sbc@chromium.org (email) + +### New Mach-O backend + +Jez Ng \ +jezng@fb.com (email) + +Greg McGary \ +gkm@fb.com (email) + +Shoaib Meenai \ +smeenai@fb.com (email) diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index c5f98f76bda31..7f310d3762429 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -434,7 +434,7 @@ Others only have a lead maintainer listed here. [Flang maintainers](https://github.com/llvm/llvm-project/blob/main/flang/Maintainers.txt) -[LLD maintainers](https://github.com/llvm/llvm-project/blob/main/lld/CODE_OWNERS.TXT) +[LLD maintainers](https://github.com/llvm/llvm-project/blob/main/lld/Maintainers.md) [LLDB maintainers](https://github.com/llvm/llvm-project/blob/main/lldb/Maintainers.rst) From aec9ecbcca2a3337a7d52130ab8f61a2ff151429 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Dec 2024 11:46:05 +0000 Subject: [PATCH 059/191] [github] Change "code owners" note to refer to "maintainers" (#118087) --- .github/CODEOWNERS | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 24f8704967d46..098d36f162205 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,10 +7,9 @@ # to receive an approval from a "code owner" in particular -- any LLVM project # member can approve pull requests. # -# Note that GitHub's concept of "code owner" is independent from LLVM's own -# "code owner" concept, they merely happen to share terminology. See -# https://llvm.org/docs/DeveloperPolicy.html#code-owners, as well as the -# CODE_OWNERS.txt files in the respective subproject directories. +# This is independent of LLVM's own "maintainer" concept. +# See https://llvm.org/docs/DeveloperPolicy.html#maintainers as well as the +# Maintainers.* files in the the respective subproject directories. /libcxx/ @llvm/reviewers-libcxx /libcxxabi/ @llvm/reviewers-libcxxabi From b2df0074134add80ba3a483a479601b00a9f9fc7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 3 Dec 2024 12:54:26 +0100 Subject: [PATCH 060/191] [FastISel] Support unreachable with NoTrapAfterNoReturn (#118296) Currently FastISel triggers a fallback if there is an unreachable terminator and the TrapUnreachable option is enabled (the ISD::TRAP selection does not actually work). Add handling for NoTrapAfterNoReturn, in which case we don't actually need to emit a trap. The test is just there to make sure there is no FastISel fallback (which is why I'm not testing the case without noreturn). We have other tests that check the actual unreachable codegen variations. --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 ++++++++++++---- .../X86/no-trap-after-noreturn-fastisel.ll | 13 +++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/X86/no-trap-after-noreturn-fastisel.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index eede879e7e80d..d5551758c073e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1851,11 +1851,19 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return false; } - case Instruction::Unreachable: - if (TM.Options.TrapUnreachable) + case Instruction::Unreachable: { + if (TM.Options.TrapUnreachable) { + if (TM.Options.NoTrapAfterNoreturn) { + const auto *Call = + dyn_cast_or_null(cast(I)->getPrevNode()); + if (Call && Call->doesNotReturn()) + return true; + } + return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; - else - return true; + } + return true; + } case Instruction::Alloca: // FunctionLowering has the static-sized case covered. diff --git a/llvm/test/CodeGen/X86/no-trap-after-noreturn-fastisel.ll b/llvm/test/CodeGen/X86/no-trap-after-noreturn-fastisel.ll new file mode 100644 index 0000000000000..5149209f79d15 --- /dev/null +++ b/llvm/test/CodeGen/X86/no-trap-after-noreturn-fastisel.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O0 -trap-unreachable -no-trap-after-noreturn -fast-isel-abort=3 < %s | FileCheck %s + +declare void @foo() + +define void @noreturn_unreachable() nounwind { +; CHECK-LABEL: noreturn_unreachable: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo@PLT + call void @foo() noreturn + unreachable +} From 61c2ac03d85f731d75cda23d1918f03d0cb962dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Tue, 3 Dec 2024 13:35:57 +0100 Subject: [PATCH 061/191] Revert "[clang][bytecode] Handle __builtin_wcslen (#118446)" This reverts commit 89a0ee89973c3d213c4bc11c26b41eab67e06da0. This breaks builders: https://lab.llvm.org/buildbot/#/builders/13/builds/3885 --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 +--- clang/test/AST/ByteCode/builtin-functions.cpp | 12 ------------ 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b788656f9484f..85cffb0c4332d 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -243,7 +243,7 @@ static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, unsigned ID = Func->getBuiltinID(); const Pointer &StrPtr = getParam(Frame, 0); - if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen) + if (ID == Builtin::BIstrlen) diagnoseNonConstexprBuiltin(S, OpPC, ID); if (!CheckArray(S, OpPC, StrPtr)) @@ -1859,8 +1859,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, break; case Builtin::BI__builtin_strlen: case Builtin::BIstrlen: - case Builtin::BI__builtin_wcslen: - case Builtin::BIwcslen: if (!interp__builtin_strlen(S, OpPC, Frame, F, Call)) return false; break; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index b951c04dde598..f70b77fe74636 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -15,10 +15,6 @@ #error "huh?" #endif -extern "C" { - typedef decltype(sizeof(int)) size_t; - extern size_t wcslen(const wchar_t *p); -} namespace strcmp { constexpr char kFoobar[6] = {'f','o','o','b','a','r'}; @@ -97,14 +93,6 @@ constexpr const char *a = "foo\0quux"; constexpr char d[] = { 'f', 'o', 'o' }; // no nul terminator. constexpr int bad = __builtin_strlen(d); // both-error {{constant expression}} \ // both-note {{one-past-the-end}} - - constexpr int wn = __builtin_wcslen(L"hello"); - static_assert(wn == 5); - constexpr int wm = wcslen(L"hello"); // both-error {{constant expression}} \ - // both-note {{non-constexpr function 'wcslen' cannot be used in a constant expression}} - - int arr[3]; // both-note {{here}} - int wk = arr[wcslen(L"hello")]; // both-warning {{array index 5}} } namespace nan { From 807544561310d49b51915f365b7521412d68c219 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 3 Dec 2024 12:41:30 +0000 Subject: [PATCH 062/191] [LoopVectorize] Add tests for dereferenceable loads in more loops (#118470) * Adds tests for strided accesses. * Adds tests for reverse loops. As part of this I've moved one of the negative tests from load-deref-pred-align.ll into a new file (load-deref-pred-neg-off.ll) because the pointer type had a size of 16 bits and I realised it's probably not sensible for allocas that are >16 bits in size! --- .../LoopVectorize/load-deref-pred-align.ll | 527 ++++++++++++++---- .../LoopVectorize/load-deref-pred-neg-off.ll | 104 ++++ 2 files changed, 532 insertions(+), 99 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 1ef01e3b793d5..bf22d63850835 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1-p:16:16:16:16" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" declare void @init(ptr nocapture nofree) @@ -200,104 +200,6 @@ loop_exit: } -; Test where offset relative to alloca is negative and we shouldn't -; treat predicated loads as being always dereferenceable. -define i8 @test_negative_off(i16 %len, ptr %test_base) { -; CHECK-LABEL: @test_negative_off( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [64638 x i8], align 1 -; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE2]] ] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 -1000, [[DOTCAST]] -; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i1, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = load i1, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i1> poison, i1 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i1> [[TMP6]], i1 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP0]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] -; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP1]] -; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i8> [[TMP12]], i8 [[TMP15]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i8> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i8> [[TMP17]], <2 x i8> zeroinitializer -; CHECK-NEXT: [[TMP18]] = add <2 x i8> [[VEC_PHI]], [[PREDPHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP20:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[TMP18]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ -988, [[MIDDLE_BLOCK]] ], [ -1000, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i8 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[ADDR]], align 1 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i8 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i8 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i16 [[IV]], -990 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i8 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[ACCUM_NEXT_LCSSA]] -; -entry: - %alloca = alloca [64638 x i8] - call void @init(ptr %alloca) - br label %loop -loop: - %iv = phi i16 [ -1000, %entry ], [ %iv.next, %latch ] - %accum = phi i8 [ 0, %entry ], [ %accum.next, %latch ] - %iv.next = add i16 %iv, 1 - %test_addr = getelementptr inbounds i1, ptr %test_base, i16 %iv - %earlycnd = load i1, ptr %test_addr - br i1 %earlycnd, label %pred, label %latch -pred: - %addr = getelementptr i8, ptr %alloca, i16 %iv - %val = load i8, ptr %addr - br label %latch -latch: - %val.phi = phi i8 [ 0, %loop ], [ %val, %pred ] - %accum.next = add i8 %accum, %val.phi - %exit = icmp ugt i16 %iv, -990 - br i1 %exit, label %loop_exit, label %loop -loop_exit: - ret i8 %accum.next -} - - define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-LABEL: @loop_requires_scev_predicate( ; CHECK-NEXT: entry: @@ -423,3 +325,430 @@ for.inc: exit: ret i32 0 } + + +; Test reverse loops where we should be able to prove loads in predicated blocks +; are safe to load unconditionally. +define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { +; CHECK-LABEL: @test_rev_loops_deref_loads( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_CMP:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @init(ptr [[LOCAL_SRC]]) +; CHECK-NEXT: call void @init(ptr [[LOCAL_CMP]]) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], splat (i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2 +; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2 +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP20]], 2 +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[IV]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) +; CHECK-NEXT: ret void +; +entry: + %local_dest = alloca [1024 x i32], align 4 + %local_src = alloca [1024 x i32], align 4 + %local_cmp = alloca [1024 x i32], align 4 + call void @init(ptr %local_src) + call void @init(ptr %local_cmp) + br label %for.body + +for.body: + %iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.inc ] + %arrayidx = getelementptr inbounds [1024 x i32], ptr %local_cmp, i64 0, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp3.not = icmp eq i32 %0, 3 + br i1 %cmp3.not, label %for.inc, label %if.then + +if.then: + %arrayidx5 = getelementptr inbounds [1024 x i32], ptr %local_src, i64 0, i64 %iv + %1 = load i32, ptr %arrayidx5, align 4 + %mul = shl nsw i32 %1, 2 + %arrayidx7 = getelementptr inbounds [1024 x i32], ptr %local_dest, i64 0, i64 %iv + store i32 %mul, ptr %arrayidx7, align 4 + br label %for.inc + +for.inc: + %iv.next = add nsw i64 %iv, -1 + %cmp2.not = icmp eq i64 %iv, 0 + br i1 %cmp2.not, label %exit, label %for.body + +exit: + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %local_dest, i64 1024, i1 false) + ret void +} + + +; Test reverse loops where we *cannot* prove loads in predicated blocks are safe +; to load unconditionally. +define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %dest) { +; CHECK-LABEL: @test_rev_loops_non_deref_loads( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_CMP:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @init(ptr [[LOCAL_SRC]]) +; CHECK-NEXT: call void @init(ptr [[LOCAL_CMP]]) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[REVERSE]], splat (i32 3) +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i32 [[TMP10]], 2 +; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP17]], 2 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[OFF:%.*]] = add i64 [[IV]], -1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFF]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP22]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFF]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP23]], 2 +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFF]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) +; CHECK-NEXT: ret void +; +entry: + %local_dest = alloca [1024 x i32], align 4 + %local_src = alloca [1024 x i32], align 4 + %local_cmp = alloca [1024 x i32], align 4 + call void @init(ptr %local_src) + call void @init(ptr %local_cmp) + br label %for.body + +for.body: + %iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.inc ] + %off = add i64 %iv, -1 + %arrayidx = getelementptr inbounds [1024 x i32], ptr %local_cmp, i64 0, i64 %off + %0 = load i32, ptr %arrayidx, align 4 + %cmp3.not = icmp eq i32 %0, 3 + br i1 %cmp3.not, label %for.inc, label %if.then + +if.then: + %arrayidx5 = getelementptr inbounds [1024 x i32], ptr %local_src, i64 0, i64 %off + %1 = load i32, ptr %arrayidx5, align 4 + %mul = shl nsw i32 %1, 2 + %arrayidx7 = getelementptr inbounds [1024 x i32], ptr %local_dest, i64 0, i64 %off + store i32 %mul, ptr %arrayidx7, align 4 + br label %for.inc + +for.inc: + %iv.next = add nsw i64 %iv, -1 + %cmp2.not = icmp eq i64 %iv, 0 + br i1 %cmp2.not, label %exit, label %for.body + +exit: + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %local_dest, i64 1024, i1 false) + ret void +} + + +; Test a loop with a positive step recurrence that has a strided access +define i16 @test_strided_access(i64 %len, ptr %test_base) { +; CHECK-LABEL: @test_strided_access( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [163840 x i16], align 4 +; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP6]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP10]], i32 1 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP12]], <2 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13]] = add <2 x i16> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP13]]) +; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] +; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]] +; CHECK: pred: +; CHECK-NEXT: [[IV_STRIDE:%.*]] = mul i64 [[IV]], 2 +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV_STRIDE]] +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2 +; CHECK-NEXT: br label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] +; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]] +; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095 +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: loop_exit: +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]] +; +entry: + %alloca = alloca [163840 x i16], align 4 + call void @init(ptr %alloca) + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %accum = phi i16 [ 0, %entry ], [ %accum.next, %latch ] + %iv.next = add i64 %iv, 1 + %test_addr = getelementptr inbounds i8, ptr %test_base, i64 %iv + %l.t = load i8, ptr %test_addr + %cmp = icmp sge i8 %l.t, 0 + br i1 %cmp, label %pred, label %latch +pred: + %iv.stride = mul i64 %iv, 2 + %addr = getelementptr inbounds i16, ptr %alloca, i64 %iv.stride + %val = load i16, ptr %addr, align 2 + br label %latch +latch: + %val.phi = phi i16 [0, %loop], [%val, %pred] + %accum.next = add i16 %accum, %val.phi + %exit = icmp eq i64 %iv, 4095 + br i1 %exit, label %loop_exit, label %loop + +loop_exit: + ret i16 %accum.next +} + + +; Test a loop with a negative step recurrence that has a strided access +define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly %dest) { +; CHECK-LABEL: @test_rev_loops_strided_deref_loads( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_CMP:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @init(ptr [[LOCAL_SRC]]) +; CHECK-NEXT: call void @init(ptr [[LOCAL_CMP]]) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], splat (i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2 +; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP23]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP24]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2 +; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 511, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP21]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[IV_STRIDED:%.*]] = mul i64 [[IV]], 2 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[IV_STRIDED]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP22]], 2 +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[IV]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) +; CHECK-NEXT: ret void +; +entry: + %local_dest = alloca [1024 x i32], align 4 + %local_src = alloca [1024 x i32], align 4 + %local_cmp = alloca [1024 x i32], align 4 + call void @init(ptr %local_src) + call void @init(ptr %local_cmp) + br label %for.body + +for.body: + %iv = phi i64 [ 511, %entry ], [ %iv.next, %for.inc ] + %arrayidx = getelementptr inbounds [1024 x i32], ptr %local_cmp, i64 0, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp3.not = icmp eq i32 %0, 3 + br i1 %cmp3.not, label %for.inc, label %if.then + +if.then: + %iv.strided = mul i64 %iv, 2 + %arrayidx5 = getelementptr inbounds [1024 x i32], ptr %local_src, i64 0, i64 %iv.strided + %1 = load i32, ptr %arrayidx5, align 4 + %mul = shl nsw i32 %1, 2 + %arrayidx7 = getelementptr inbounds [1024 x i32], ptr %local_dest, i64 0, i64 %iv + store i32 %mul, ptr %arrayidx7, align 4 + br label %for.inc + +for.inc: + %iv.next = add nsw i64 %iv, -1 + %cmp2.not = icmp eq i64 %iv, 0 + br i1 %cmp2.not, label %exit, label %for.body + +exit: + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %local_dest, i64 1024, i1 false) + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll new file mode 100644 index 0000000000000..1dd526df503bd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1-p:16:16:16:16" + +declare void @init(ptr nocapture nofree) + + +; Test where offset relative to alloca is negative and we shouldn't +; treat predicated loads as being always dereferenceable. +define i8 @test_negative_off(i16 %len, ptr %test_base) { +; CHECK-LABEL: @test_negative_off( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [64638 x i8], align 1 +; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE2]] ] +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 -1000, [[DOTCAST]] +; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load i1, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i1, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i1> poison, i1 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i1> [[TMP6]], i1 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.if1: +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i8> [[TMP12]], i8 [[TMP15]], i32 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i8> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i8> [[TMP17]], <2 x i8> zeroinitializer +; CHECK-NEXT: [[TMP18]] = add <2 x i8> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP20:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[TMP18]]) +; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ -988, [[MIDDLE_BLOCK]] ], [ -1000, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i8 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[IV]] +; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 +; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] +; CHECK: pred: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[IV]] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[ADDR]], align 1 +; CHECK-NEXT: br label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i8 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] +; CHECK-NEXT: [[ACCUM_NEXT]] = add i8 [[ACCUM]], [[VAL_PHI]] +; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i16 [[IV]], -990 +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: loop_exit: +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i8 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i8 [[ACCUM_NEXT_LCSSA]] +; +entry: + %alloca = alloca [64638 x i8] + call void @init(ptr %alloca) + br label %loop +loop: + %iv = phi i16 [ -1000, %entry ], [ %iv.next, %latch ] + %accum = phi i8 [ 0, %entry ], [ %accum.next, %latch ] + %iv.next = add i16 %iv, 1 + %test_addr = getelementptr inbounds i1, ptr %test_base, i16 %iv + %earlycnd = load i1, ptr %test_addr + br i1 %earlycnd, label %pred, label %latch +pred: + %addr = getelementptr i8, ptr %alloca, i16 %iv + %val = load i8, ptr %addr + br label %latch +latch: + %val.phi = phi i8 [ 0, %loop ], [ %val, %pred ] + %accum.next = add i8 %accum, %val.phi + %exit = icmp ugt i16 %iv, -990 + br i1 %exit, label %loop_exit, label %loop +loop_exit: + ret i8 %accum.next +} From 5f99eb9b138d7059b2a1f53adea0a58314761f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Tue, 3 Dec 2024 13:42:02 +0100 Subject: [PATCH 063/191] [SPIR-V] Fixup storage class for global private (#118318) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-land of #116636 Adds a new address spaces: hlsl_private. Variables with such address space will be emitted with a Private storage class. This is useful for variables global to a SPIR-V module, since up to now, they were still emitted with a Function storage class, which is wrong. --------- Signed-off-by: Nathan Gauër --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 30 ++++++++++++------- llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 17 ++++++----- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 4 +++ llvm/lib/Target/SPIRV/SPIRVUtils.h | 4 +++ .../SPIRV/pointers/global-addrspacecast.ll | 17 +++++++++++ .../pointers/variables-storage-class-vk.ll | 15 ++++++++++ .../SPIRV/pointers/variables-storage-class.ll | 22 ++++++++++---- 7 files changed, 86 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll create mode 100644 llvm/test/CodeGen/SPIRV/pointers/variables-storage-class-vk.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index d0335117cbe12..3547ac66430a8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -1460,6 +1460,16 @@ bool SPIRVInstructionSelector::selectAddrSpaceCast(Register ResVReg, .addUse(SrcPtr) .constrainAllUses(TII, TRI, RBI); + if ((SrcSC == SPIRV::StorageClass::Function && + DstSC == SPIRV::StorageClass::Private) || + (DstSC == SPIRV::StorageClass::Function && + SrcSC == SPIRV::StorageClass::Private)) { + return BuildMI(BB, I, DL, TII.get(TargetOpcode::COPY)) + .addDef(ResVReg) + .addUse(SrcPtr) + .constrainAllUses(TII, TRI, RBI); + } + // Casting from an eligible pointer to Generic. if (DstSC == SPIRV::StorageClass::Generic && isGenericCastablePtr(SrcSC)) return selectUnOp(ResVReg, ResType, I, SPIRV::OpPtrCastToGeneric); @@ -3461,11 +3471,7 @@ bool SPIRVInstructionSelector::selectGlobalValue( if (HasInit && !Init) return true; - unsigned AddrSpace = GV->getAddressSpace(); - SPIRV::StorageClass::StorageClass Storage = - addressSpaceToStorageClass(AddrSpace, STI); - bool HasLnkTy = GV->getLinkage() != GlobalValue::InternalLinkage && - Storage != SPIRV::StorageClass::Function; + bool HasLnkTy = GV->getLinkage() != GlobalValue::InternalLinkage; SPIRV::LinkageType::LinkageType LnkType = (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) ? SPIRV::LinkageType::Import @@ -3474,12 +3480,14 @@ bool SPIRVInstructionSelector::selectGlobalValue( ? SPIRV::LinkageType::LinkOnceODR : SPIRV::LinkageType::Export); - SPIRVType *ResType = GR.getOrCreateSPIRVPointerType( - PointerBaseType, I, TII, - addressSpaceToStorageClass(GV->getAddressSpace(), STI)); - Register Reg = GR.buildGlobalVariable(ResVReg, ResType, GlobalIdent, GV, - Storage, Init, GlobalVar->isConstant(), - HasLnkTy, LnkType, MIRBuilder, true); + const unsigned AddrSpace = GV->getAddressSpace(); + SPIRV::StorageClass::StorageClass StorageClass = + addressSpaceToStorageClass(AddrSpace, STI); + SPIRVType *ResType = + GR.getOrCreateSPIRVPointerType(PointerBaseType, I, TII, StorageClass); + Register Reg = GR.buildGlobalVariable( + ResVReg, ResType, GlobalIdent, GV, StorageClass, Init, + GlobalVar->isConstant(), HasLnkTy, LnkType, MIRBuilder, true); return Reg.isValid(); } diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index 90898b8bd7250..7230e0e6b9fca 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -112,13 +112,16 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { const LLT p5 = LLT::pointer(5, PSize); // Input, SPV_INTEL_usm_storage_classes (Device) const LLT p6 = LLT::pointer(6, PSize); // SPV_INTEL_usm_storage_classes (Host) + const LLT p7 = LLT::pointer(7, PSize); // Input + const LLT p8 = LLT::pointer(8, PSize); // Output + const LLT p10 = LLT::pointer(10, PSize); // Private // TODO: remove copy-pasting here by using concatenation in some way. auto allPtrsScalarsAndVectors = { - p0, p1, p2, p3, p4, p5, p6, s1, s8, s16, - s32, s64, v2s1, v2s8, v2s16, v2s32, v2s64, v3s1, v3s8, v3s16, - v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64, v8s1, v8s8, v8s16, - v8s32, v8s64, v16s1, v16s8, v16s16, v16s32, v16s64}; + p0, p1, p2, p3, p4, p5, p6, p7, p8, p10, + s1, s8, s16, s32, s64, v2s1, v2s8, v2s16, v2s32, v2s64, + v3s1, v3s8, v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64, + v8s1, v8s8, v8s16, v8s32, v8s64, v16s1, v16s8, v16s16, v16s32, v16s64}; auto allVectors = {v2s1, v2s8, v2s16, v2s32, v2s64, v3s1, v3s8, v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, @@ -145,10 +148,10 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { s16, s32, s64, v2s16, v2s32, v2s64, v3s16, v3s32, v3s64, v4s16, v4s32, v4s64, v8s16, v8s32, v8s64, v16s16, v16s32, v16s64}; - auto allFloatAndIntScalarsAndPtrs = {s8, s16, s32, s64, p0, p1, - p2, p3, p4, p5, p6}; + auto allFloatAndIntScalarsAndPtrs = {s8, s16, s32, s64, p0, p1, p2, + p3, p4, p5, p6, p7, p8, p10}; - auto allPtrs = {p0, p1, p2, p3, p4, p5, p6}; + auto allPtrs = {p0, p1, p2, p3, p4, p5, p6, p7, p8, p10}; bool IsExtendedInts = ST.canUseExtension( diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 1ece3044aaa7b..50338f5df9028 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -207,8 +207,12 @@ addressSpaceToStorageClass(unsigned AddrSpace, const SPIRVSubtarget &STI) { : SPIRV::StorageClass::CrossWorkgroup; case 7: return SPIRV::StorageClass::Input; + case 8: + return SPIRV::StorageClass::Output; case 9: return SPIRV::StorageClass::CodeSectionINTEL; + case 10: + return SPIRV::StorageClass::Private; default: report_fatal_error("Unknown address space"); } diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index c0569549039d5..6fefe63f44dec 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -170,8 +170,12 @@ storageClassToAddressSpace(SPIRV::StorageClass::StorageClass SC) { return 6; case SPIRV::StorageClass::Input: return 7; + case SPIRV::StorageClass::Output: + return 8; case SPIRV::StorageClass::CodeSectionINTEL: return 9; + case SPIRV::StorageClass::Private: + return 10; default: report_fatal_error("Unable to get address space id"); } diff --git a/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll b/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll new file mode 100644 index 0000000000000..544c657da8488 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/global-addrspacecast.ll @@ -0,0 +1,17 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +@PrivInternal = internal addrspace(10) global i32 456 +; CHECK-DAG: %[[#type:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#ptrty:]] = OpTypePointer Private %[[#type]] +; CHECK-DAG: %[[#value:]] = OpConstant %[[#type]] 456 +; CHECK-DAG: %[[#var:]] = OpVariable %[[#ptrty]] Private %[[#value]] + +define spir_kernel void @Foo() { + %p = addrspacecast ptr addrspace(10) @PrivInternal to ptr + %v = load i32, ptr %p, align 4 + ret void +; CHECK: OpLabel +; CHECK-NEXT: OpLoad %[[#type]] %[[#var]] Aligned 4 +; CHECK-Next: OpReturn +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class-vk.ll b/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class-vk.ll new file mode 100644 index 0000000000000..e8b1dc263f150 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class-vk.ll @@ -0,0 +1,15 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#U32:]] = OpTypeInt 32 0 + +; CHECK-DAG: %[[#VAL:]] = OpConstant %[[#U32]] 456 +; CHECK-DAG: %[[#VTYPE:]] = OpTypePointer Private %[[#U32]] +; CHECK-DAG: %[[#VAR:]] = OpVariable %[[#VTYPE]] Private %[[#VAL]] +; CHECK-NOT: OpDecorate %[[#VAR]] LinkageAttributes +@PrivInternal = internal addrspace(10) global i32 456 + +define void @main() { + %l = load i32, ptr addrspace(10) @PrivInternal + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class.ll b/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class.ll index 2d4c805ac9df1..a1ded0569d67e 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/variables-storage-class.ll @@ -1,17 +1,29 @@ ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; CHECK-DAG: %[[#U8:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#U32:]] = OpTypeInt 32 0 + +; CHECK-DAG: %[[#TYPE:]] = OpTypePointer CrossWorkgroup %[[#U8]] +; CHECK-DAG: %[[#VAL:]] = OpConstantNull %[[#TYPE]] +; CHECK-DAG: %[[#VTYPE:]] = OpTypePointer CrossWorkgroup %[[#TYPE]] +; CHECK-DAG: %[[#PTR:]] = OpVariable %[[#VTYPE]] CrossWorkgroup %[[#VAL]] @Ptr = addrspace(1) global ptr addrspace(1) null -@Init = private addrspace(2) constant i32 123 -; CHECK-DAG: %[[#PTR:]] = OpVariable %[[#]] UniformConstant %[[#]] -; CHECK-DAG: %[[#INIT:]] = OpVariable %[[#]] CrossWorkgroup %[[#]] +; CHECK-DAG: %[[#VAL:]] = OpConstant %[[#U32]] 123 +; CHECK-DAG: %[[#VTYPE:]] = OpTypePointer UniformConstant %[[#U32]] +; CHECK-DAG: %[[#INIT:]] = OpVariable %[[#VTYPE]] UniformConstant %[[#VAL]] +@Init = private addrspace(2) constant i32 123 -; CHECK: %[[#]] = OpLoad %[[#]] %[[#INIT]] Aligned 8 -; CHECK: OpCopyMemorySized %[[#]] %[[#PTR]] %[[#]] Aligned 4 +; CHECK-DAG: %[[#VAL:]] = OpConstant %[[#U32]] 456 +; CHECK-DAG: %[[#VTYPE:]] = OpTypePointer Private %[[#U32]] +; CHECK-DAG: %[[#]] = OpVariable %[[#VTYPE]] Private %[[#VAL]] +@PrivInternal = internal addrspace(10) global i32 456 define spir_kernel void @Foo() { + ; CHECK: %[[#]] = OpLoad %[[#]] %[[#PTR]] Aligned 8 %l = load ptr addrspace(1), ptr addrspace(1) @Ptr, align 8 + ; CHECK: OpCopyMemorySized %[[#]] %[[#INIT]] %[[#]] Aligned 4 call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) align 4 %l, ptr addrspace(2) align 1 @Init, i64 4, i1 false) ret void } From 4a6ecd38218219ede3df34eeea72f3ffccec3413 Mon Sep 17 00:00:00 2001 From: Viktoria Maximova Date: Tue, 3 Dec 2024 13:47:18 +0100 Subject: [PATCH 064/191] Add support for SPIR-V extension: SPV_INTEL_media_block_io (#118024) This changes implements SPV_INTEL_media_block_io extension in SPIR-V backend. --- llvm/docs/SPIRVUsage.rst | 2 + llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 24 +++- llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 24 +++- llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 2 + llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 6 + llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 7 ++ .../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 2 +- .../builtin-op-wrappers.ll | 115 ++++++++++++++++++ .../cl_intel_media_block_io.ll | 115 ++++++++++++++++++ 9 files changed, 286 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/builtin-op-wrappers.ll create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/cl_intel_media_block_io.ll diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index ffb04dca00fc5..0a2f47c54dd97 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -165,6 +165,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na - Adds decorations that can be applied to global (module scope) variables. * - ``SPV_INTEL_global_variable_fpga_decorations`` - Adds decorations that can be applied to global (module scope) variables to help code generation for FPGA devices. + * - ``SPV_INTEL_media_block_io`` + - Adds additional subgroup block read and write functionality that allow applications to flexibly specify the width and height of the block to read from or write to a 2D image. * - ``SPV_INTEL_optnone`` - Adds OptNoneINTEL value for Function Control mask that indicates a request to not optimize the function. * - ``SPV_INTEL_split_barrier`` diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index a1684b87722cb..45a49674d4ca2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -91,6 +91,7 @@ struct IntelSubgroupsBuiltin { uint32_t Opcode; bool IsBlock; bool IsWrite; + bool IsMedia; }; #define GET_IntelSubgroupsBuiltins_DECL @@ -215,6 +216,8 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall) { // - "__spirv_ReadClockKHR" // - "__spirv_SubgroupBlockReadINTEL" // - "__spirv_SubgroupImageBlockReadINTEL" + // - "__spirv_SubgroupImageMediaBlockReadINTEL" + // - "__spirv_SubgroupImageMediaBlockWriteINTEL" // - "__spirv_Convert" // - "__spirv_UConvert" // - "__spirv_SConvert" @@ -225,7 +228,9 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall) { static const std::regex SpvWithR( "(__spirv_(ImageSampleExplicitLod|ImageRead|ImageQuerySizeLod|UDotKHR|" "SDotKHR|SUDotKHR|SDotAccSatKHR|UDotAccSatKHR|SUDotAccSatKHR|" - "ReadClockKHR|SubgroupBlockReadINTEL|SubgroupImageBlockReadINTEL|Convert|" + "ReadClockKHR|SubgroupBlockReadINTEL|SubgroupImageBlockReadINTEL|" + "SubgroupImageMediaBlockReadINTEL|SubgroupImageMediaBlockWriteINTEL|" + "Convert|" "UConvert|SConvert|FConvert|SatConvert).*)_R.*"); std::smatch Match; if (std::regex_match(BuiltinName, Match, SpvWithR) && Match.size() > 2) @@ -1192,19 +1197,28 @@ static bool generateIntelSubgroupsInst(const SPIRV::IncomingCall *Call, const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; MachineFunction &MF = MIRBuilder.getMF(); const auto *ST = static_cast(&MF.getSubtarget()); - if (!ST->canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) { + const SPIRV::IntelSubgroupsBuiltin *IntelSubgroups = + SPIRV::lookupIntelSubgroupsBuiltin(Builtin->Name); + + if (IntelSubgroups->IsMedia && + !ST->canUseExtension(SPIRV::Extension::SPV_INTEL_media_block_io)) { + std::string DiagMsg = std::string(Builtin->Name) + + ": the builtin requires the following SPIR-V " + "extension: SPV_INTEL_media_block_io"; + report_fatal_error(DiagMsg.c_str(), false); + } else if (!IntelSubgroups->IsMedia && + !ST->canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) { std::string DiagMsg = std::string(Builtin->Name) + ": the builtin requires the following SPIR-V " "extension: SPV_INTEL_subgroups"; report_fatal_error(DiagMsg.c_str(), false); } - const SPIRV::IntelSubgroupsBuiltin *IntelSubgroups = - SPIRV::lookupIntelSubgroupsBuiltin(Builtin->Name); uint32_t OpCode = IntelSubgroups->Opcode; if (Call->isSpirvOp()) { bool IsSet = OpCode != SPIRV::OpSubgroupBlockWriteINTEL && - OpCode != SPIRV::OpSubgroupImageBlockWriteINTEL; + OpCode != SPIRV::OpSubgroupImageBlockWriteINTEL && + OpCode != SPIRV::OpSubgroupImageMediaBlockWriteINTEL; return buildOpFromWrapper(MIRBuilder, OpCode, Call, IsSet ? GR->getSPIRVTypeID(Call->ReturnType) : Register(0)); diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 43f4e78c8469c..dc2da4a3a5647 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -1156,14 +1156,19 @@ class IntelSubgroupsBuiltin { string Name = name; Op Opcode = operation; bit IsBlock = !or(!eq(operation, OpSubgroupBlockReadINTEL), - !eq(operation, OpSubgroupBlockWriteINTEL)); - bit IsWrite = !eq(operation, OpSubgroupBlockWriteINTEL); + !eq(operation, OpSubgroupBlockWriteINTEL), + !eq(operation, OpSubgroupImageMediaBlockReadINTEL), + !eq(operation, OpSubgroupImageMediaBlockWriteINTEL)); + bit IsWrite = !or(!eq(operation, OpSubgroupBlockWriteINTEL), + !eq(operation, OpSubgroupImageMediaBlockWriteINTEL)); + bit IsMedia = !or(!eq(operation, OpSubgroupImageMediaBlockReadINTEL), + !eq(operation, OpSubgroupImageMediaBlockWriteINTEL)); } // Table gathering all the Intel sub group builtins. def IntelSubgroupsBuiltins : GenericTable { let FilterClass = "IntelSubgroupsBuiltin"; - let Fields = ["Name", "Opcode", "IsBlock", "IsWrite"]; + let Fields = ["Name", "Opcode", "IsBlock", "IsWrite", "IsMedia"]; } // Function to lookup group builtins by their name and set. @@ -1191,17 +1196,24 @@ foreach i = ["", "2", "4", "8"] in { // cl_intel_subgroups_short defm : DemangledIntelSubgroupsBuiltin; defm : DemangledIntelSubgroupsBuiltin; + // cl_intel_media_block_io + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; } -// cl_intel_subgroups_char, cl_intel_subgroups_short, cl_intel_subgroups_long +// cl_intel_subgroups_char, cl_intel_subgroups_short, cl_intel_subgroups_long, cl_intel_media_block_io foreach i = ["", "2", "4", "8", "16"] in { foreach j = ["c", "s", "l"] in { defm : DemangledIntelSubgroupsBuiltin; defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; } } // OpSubgroupImageBlockReadINTEL and OpSubgroupImageBlockWriteINTEL are to be resolved later on (in code) -// Multiclass used to define builtin wrappers for the SPV_INTEL_subgroups extension. +// Multiclass used to define builtin wrappers for the SPV_INTEL_subgroups and the SPV_INTEL_media_block_io extensions. multiclass DemangledIntelSubgroupsBuiltinWrapper numArgs, Op operation> { def : DemangledBuiltin; def : IntelSubgroupsBuiltin; @@ -1215,6 +1227,8 @@ defm : DemangledIntelSubgroupsBuiltinWrapper<"SubgroupBlockReadINTEL", 1, OpSubg defm : DemangledIntelSubgroupsBuiltinWrapper<"SubgroupBlockWriteINTEL", 2, OpSubgroupBlockWriteINTEL>; defm : DemangledIntelSubgroupsBuiltinWrapper<"SubgroupImageBlockReadINTEL", 2, OpSubgroupImageBlockReadINTEL>; defm : DemangledIntelSubgroupsBuiltinWrapper<"SubgroupImageBlockWriteINTEL", 3, OpSubgroupImageBlockWriteINTEL>; +defm : DemangledIntelSubgroupsBuiltinWrapper<"SubgroupImageMediaBlockReadINTEL", 4, OpSubgroupImageMediaBlockReadINTEL>; +defm : DemangledIntelSubgroupsBuiltinWrapper<"SubgroupImageMediaBlockWriteINTEL", 5, OpSubgroupImageMediaBlockWriteINTEL>; //===----------------------------------------------------------------------===// // Class defining a builtin for group operations within uniform control flow. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 186bccc481a8a..19a9afac7b3f7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -48,6 +48,8 @@ static const std::map> SPIRV::Extension::Extension::SPV_INTEL_split_barrier}, {"SPV_INTEL_subgroups", SPIRV::Extension::Extension::SPV_INTEL_subgroups}, + {"SPV_INTEL_media_block_io", + SPIRV::Extension::Extension::SPV_INTEL_media_block_io}, {"SPV_KHR_uniform_group_instructions", SPIRV::Extension::Extension::SPV_KHR_uniform_group_instructions}, {"SPV_KHR_no_integer_wrap_decoration", diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index fde23d9d0ca5f..53f1b644a9498 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -850,6 +850,12 @@ def OpSubgroupImageBlockReadINTEL: Op<5577, (outs ID:$res), (ins TYPE:$type, ID: def OpSubgroupImageBlockWriteINTEL: Op<5578, (outs), (ins ID:$image, ID:$coordinate, ID:$data), "OpSubgroupImageBlockWriteINTEL $image $coordinate $data">; +// SPV_INTEL_media_block_io +def OpSubgroupImageMediaBlockReadINTEL: Op<5580, (outs ID:$res), (ins TYPE:$type, ID:$image, ID:$coordinate, ID:$width, ID:$height), + "$res = OpSubgroupImageMediaBlockReadINTEL $type $image $coordinate $width $height">; +def OpSubgroupImageMediaBlockWriteINTEL: Op<5581, (outs), (ins ID:$image, ID:$coordinate, ID:$width, ID:$height, ID:$data), + "OpSubgroupImageMediaBlockWriteINTEL $image $coordinate $width $height $data">; + // - SPV_KHR_uniform_group_instructions def OpGroupIMulKHR: Op<6401, (outs ID:$res), (ins TYPE:$type, ID:$scope, i32imm:$groupOp, ID:$value), "$res = OpGroupIMulKHR $type $scope $groupOp $value">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index d9f928eb90640..d001f9451e739 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1320,6 +1320,13 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::SubgroupImageBlockIOINTEL); } break; + case SPIRV::OpSubgroupImageMediaBlockReadINTEL: + case SPIRV::OpSubgroupImageMediaBlockWriteINTEL: + if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_media_block_io)) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_media_block_io); + Reqs.addCapability(SPIRV::Capability::SubgroupImageMediaBlockIOINTEL); + } + break; case SPIRV::OpAssumeTrueKHR: case SPIRV::OpExpectKHR: if (ST.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index b88f6f5766a05..35a74af6b03a3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -446,7 +446,7 @@ defm RayTracingNV : CapabilityOperand<5340, 0, 0, [], [Shader]>; defm SubgroupShuffleINTEL : CapabilityOperand<5568, 0, 0, [SPV_INTEL_subgroups], []>; defm SubgroupBufferBlockIOINTEL : CapabilityOperand<5569, 0, 0, [SPV_INTEL_subgroups], []>; defm SubgroupImageBlockIOINTEL : CapabilityOperand<5570, 0, 0, [SPV_INTEL_subgroups], []>; -defm SubgroupImageMediaBlockIOINTEL : CapabilityOperand<5579, 0, 0, [], []>; +defm SubgroupImageMediaBlockIOINTEL : CapabilityOperand<5579, 0, 0, [SPV_INTEL_media_block_io], []>; defm SubgroupAvcMotionEstimationINTEL : CapabilityOperand<5696, 0, 0, [], []>; defm SubgroupAvcMotionEstimationIntraINTEL : CapabilityOperand<5697, 0, 0, [], []>; defm SubgroupAvcMotionEstimationChromaINTEL : CapabilityOperand<5698, 0, 0, [], []>; diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/builtin-op-wrappers.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/builtin-op-wrappers.ll new file mode 100644 index 0000000000000..c30370c179516 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/builtin-op-wrappers.ll @@ -0,0 +1,115 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_media_block_io %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_media_block_io %s -o - -filetype=obj | spirv-val %} + +; CHECK: Capability SubgroupImageMediaBlockIOINTEL +; CHECK: Extension "SPV_INTEL_media_block_io" +; CHECK-COUNT-14: SubgroupImageMediaBlockReadINTEL +; CHECK-COUNT-14: SubgroupImageMediaBlockWriteINTEL + +define spir_kernel void @intel_media_block_test(<2 x i32> %edgeCoord, ptr addrspace(1) %image_in, ptr addrspace(1) %image_out) !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_base_type !8 { +entry: + %call = call spir_func i8 @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_RcharPU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call1 = call spir_func <2 x i8> @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_Rchar2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call2 = call spir_func <4 x i8> @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_Rchar4PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call3 = call spir_func <8 x i8> @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_Rchar8PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call4 = call spir_func <16 x i8> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rchar16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call5 = call spir_func i16 @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_RshortPU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call6 = call spir_func <2 x i16> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rshort2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call7 = call spir_func <4 x i16> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rshort4PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call8 = call spir_func <8 x i16> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rshort8PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call9 = call spir_func <16 x i16> @_Z49__spirv_SubgroupImageMediaBlockReadINTEL_Rshort16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call10 = call spir_func i32 @_Z45__spirv_SubgroupImageMediaBlockReadINTEL_RintPU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call11 = call spir_func <2 x i32> @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_Rint2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call12 = call spir_func <4 x i32> @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_Rint4PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + %call13 = call spir_func <8 x i32> @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_Rint8PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1) %image_in, <2 x i32> %edgeCoord, i32 1, i32 16) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiic(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, i8 %call) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv2_c(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <2 x i8> %call1) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv4_c(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <4 x i8> %call2) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv8_c(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <8 x i8> %call3) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv16_c(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <16 x i8> %call4) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiis(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, i16 %call5) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv2_s(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <2 x i16> %call6) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv4_s(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <4 x i16> %call7) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv8_s(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <8 x i16> %call8) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv16_s(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <16 x i16> %call9) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiii(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, i32 %call10) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiS2_(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <2 x i32> %call11) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv4_i(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <4 x i32> %call12) + call spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv8_i(ptr addrspace(1) %image_out, <2 x i32> %edgeCoord, i32 1, i32 16, <8 x i32> %call13) + ret void +} + +declare spir_func i8 @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_RcharPU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <2 x i8> @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_Rchar2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <4 x i8> @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_Rchar4PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <8 x i8> @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_Rchar8PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <16 x i8> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rchar16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func i16 @_Z47__spirv_SubgroupImageMediaBlockReadINTEL_RshortPU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <2 x i16> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rshort2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <4 x i16> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rshort4PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <8 x i16> @_Z48__spirv_SubgroupImageMediaBlockReadINTEL_Rshort8PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <16 x i16> @_Z49__spirv_SubgroupImageMediaBlockReadINTEL_Rshort16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func i32 @_Z45__spirv_SubgroupImageMediaBlockReadINTEL_RintPU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <2 x i32> @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_Rint2PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <4 x i32> @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_Rint4PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func <8 x i32> @_Z46__spirv_SubgroupImageMediaBlockReadINTEL_Rint8PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_iii(ptr addrspace(1), <2 x i32>, i32, i32) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiic(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, i8) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv2_c(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <2 x i8>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv4_c(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <4 x i8>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv8_c(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <8 x i8>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv16_c(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <16 x i8>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiis(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, i16) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv2_s(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <2 x i16>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv4_s(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <4 x i16>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv8_s(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <8 x i16>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv16_s(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <16 x i16>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiii(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, i32) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiS2_(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <2 x i32>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv4_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <4 x i32>) + +declare spir_func void @_Z41__spirv_SubgroupImageMediaBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iiiDv8_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), <2 x i32>, i32, i32, <8 x i32>) + +!spirv.MemoryModel = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!spirv.Source = !{!1} +!opencl.spir.version = !{!2} +!opencl.ocl.version = !{!2} +!opencl.used.extensions = !{!3} +!opencl.used.optional.core.features = !{!4} +!spirv.Generator = !{!5} + +!0 = !{i32 1, i32 2} +!1 = !{i32 3, i32 200000} +!2 = !{i32 2, i32 0} +!3 = !{} +!4 = !{!"cl_images"} +!5 = !{i16 6, i16 14} +!6 = !{i32 0, i32 1, i32 1} +!7 = !{!"none", !"read_only", !"write_only"} +!8 = !{!"int2", !"image2d_t", !"image2d_t"} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/cl_intel_media_block_io.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/cl_intel_media_block_io.ll new file mode 100644 index 0000000000000..735094c7c8862 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_media_block_io/cl_intel_media_block_io.ll @@ -0,0 +1,115 @@ +; Compiled from https://github.com/KhronosGroup/SPIRV-LLVM-Translator/test/extensions/INTEL/SPV_INTEL_media_block_io/SPV_INTEL_media_block_io.cl + +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: LLVM ERROR: intel_sub_group_media_block_read_uc: the builtin requires the following SPIR-V extension: SPV_INTEL_media_block_io + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_media_block_io %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_media_block_io %s -o - -filetype=obj | spirv-val %} +; CHECK: Capability SubgroupImageMediaBlockIOINTEL +; CHECK: Extension "SPV_INTEL_media_block_io" + +; CHECK-COUNT-14: SubgroupImageMediaBlockReadINTEL +; CHECK-COUNT-14: SubgroupImageMediaBlockWriteINTEL + +define spir_kernel void @intel_media_block_test(<2 x i32> noundef %edgeCoord, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !6 { +entry: + %call = tail call spir_func zeroext i8 @_Z35intel_sub_group_media_block_read_ucDv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call1 = tail call spir_func <2 x i8> @_Z36intel_sub_group_media_block_read_uc2Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call2 = tail call spir_func <4 x i8> @_Z36intel_sub_group_media_block_read_uc4Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call3 = tail call spir_func <8 x i8> @_Z36intel_sub_group_media_block_read_uc8Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call4 = tail call spir_func <16 x i8> @_Z37intel_sub_group_media_block_read_uc16Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call5 = tail call spir_func zeroext i16 @_Z35intel_sub_group_media_block_read_usDv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call6 = tail call spir_func <2 x i16> @_Z36intel_sub_group_media_block_read_us2Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call7 = tail call spir_func <4 x i16> @_Z36intel_sub_group_media_block_read_us4Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call8 = tail call spir_func <8 x i16> @_Z36intel_sub_group_media_block_read_us8Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call9 = tail call spir_func <16 x i16> @_Z37intel_sub_group_media_block_read_us16Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call10 = tail call spir_func i32 @_Z35intel_sub_group_media_block_read_uiDv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call11 = tail call spir_func <2 x i32> @_Z36intel_sub_group_media_block_read_ui2Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call12 = tail call spir_func <4 x i32> @_Z36intel_sub_group_media_block_read_ui4Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + %call13 = tail call spir_func <8 x i32> @_Z36intel_sub_group_media_block_read_ui8Dv2_iii14ocl_image2d_ro(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %src_luma_image) + tail call spir_func void @_Z36intel_sub_group_media_block_write_ucDv2_iiih14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, i8 noundef zeroext %call, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_uc2Dv2_iiiDv2_h14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <2 x i8> noundef %call1, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_uc4Dv2_iiiDv4_h14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <4 x i8> noundef %call2, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_uc8Dv2_iiiDv8_h14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <8 x i8> noundef %call3, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z38intel_sub_group_media_block_write_uc16Dv2_iiiDv16_h14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <16 x i8> noundef %call4, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z36intel_sub_group_media_block_write_usDv2_iiit14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, i16 noundef zeroext %call5, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_us2Dv2_iiiDv2_t14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <2 x i16> noundef %call6, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_us4Dv2_iiiDv4_t14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <4 x i16> noundef %call7, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_us8Dv2_iiiDv8_t14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <8 x i16> noundef %call8, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z38intel_sub_group_media_block_write_us16Dv2_iiiDv16_t14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <16 x i16> noundef %call9, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z36intel_sub_group_media_block_write_uiDv2_iiij14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, i32 noundef %call10, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_ui2Dv2_iiiDv2_j14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <2 x i32> noundef %call11, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_ui4Dv2_iiiDv4_j14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <4 x i32> noundef %call12, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + tail call spir_func void @_Z37intel_sub_group_media_block_write_ui8Dv2_iiiDv8_j14ocl_image2d_wo(<2 x i32> noundef %edgeCoord, i32 noundef 1, i32 noundef 16, <8 x i32> noundef %call13, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %dst_luma_image) + ret void +} + +declare spir_func zeroext i8 @_Z35intel_sub_group_media_block_read_ucDv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <2 x i8> @_Z36intel_sub_group_media_block_read_uc2Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <4 x i8> @_Z36intel_sub_group_media_block_read_uc4Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <8 x i8> @_Z36intel_sub_group_media_block_read_uc8Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <16 x i8> @_Z37intel_sub_group_media_block_read_uc16Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func zeroext i16 @_Z35intel_sub_group_media_block_read_usDv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <2 x i16> @_Z36intel_sub_group_media_block_read_us2Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <4 x i16> @_Z36intel_sub_group_media_block_read_us4Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <8 x i16> @_Z36intel_sub_group_media_block_read_us8Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <16 x i16> @_Z37intel_sub_group_media_block_read_us16Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func i32 @_Z35intel_sub_group_media_block_read_uiDv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <2 x i32> @_Z36intel_sub_group_media_block_read_ui2Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <4 x i32> @_Z36intel_sub_group_media_block_read_ui4Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func <8 x i32> @_Z36intel_sub_group_media_block_read_ui8Dv2_iii14ocl_image2d_ro(<2 x i32> noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) + +declare spir_func void @_Z36intel_sub_group_media_block_write_ucDv2_iiih14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, i8 noundef zeroext, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_uc2Dv2_iiiDv2_h14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <2 x i8> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_uc4Dv2_iiiDv4_h14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <4 x i8> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_uc8Dv2_iiiDv8_h14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <8 x i8> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z38intel_sub_group_media_block_write_uc16Dv2_iiiDv16_h14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <16 x i8> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z36intel_sub_group_media_block_write_usDv2_iiit14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, i16 noundef zeroext, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_us2Dv2_iiiDv2_t14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <2 x i16> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_us4Dv2_iiiDv4_t14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <4 x i16> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_us8Dv2_iiiDv8_t14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <8 x i16> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z38intel_sub_group_media_block_write_us16Dv2_iiiDv16_t14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <16 x i16> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z36intel_sub_group_media_block_write_uiDv2_iiij14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, i32 noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_ui2Dv2_iiiDv2_j14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <2 x i32> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_ui4Dv2_iiiDv4_j14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <4 x i32> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + +declare spir_func void @_Z37intel_sub_group_media_block_write_ui8Dv2_iiiDv8_j14ocl_image2d_wo(<2 x i32> noundef, i32 noundef, i32 noundef, <8 x i32> noundef, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) + + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!2 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 32da1fd8c7d45d5209c6c781910c51940779ec52)"} +!3 = !{i32 0, i32 1, i32 1} +!4 = !{!"none", !"read_only", !"write_only"} +!5 = !{!"int2", !"image2d_t", !"image2d_t"} +!6 = !{!"int __attribute__((ext_vector_type(2)))", !"image2d_t", !"image2d_t"} From a63931292bad148a3498fdb30bbccb43844aee86 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Tue, 3 Dec 2024 20:56:52 +0800 Subject: [PATCH 065/191] [X86] Fix typo of gracemont (#118486) --- compiler-rt/lib/builtins/cpu_model/x86.c | 2 +- llvm/lib/TargetParser/Host.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index f9b95d2faf06a..606571d527501 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -485,7 +485,7 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family, // Gracemont: case 0xbe: - CPU = "gracement"; + CPU = "gracemont"; *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_ALDERLAKE; break; diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 4457e481def10..cd5a678d161b7 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -815,7 +815,7 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, // Gracemont case 0xbe: - CPU = "gracement"; + CPU = "gracemont"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ALDERLAKE; break; From 81d82cac8c4cbd006bf991fd7380de2d72858d1c Mon Sep 17 00:00:00 2001 From: macurtis-amd Date: Tue, 3 Dec 2024 06:59:57 -0600 Subject: [PATCH 066/191] [flang] Treat pre-processed input as fixed (#117563) Fixes an issue introduced by 9fb2db1e1f42 [flang] Retain spaces when preprocessing fixed-form source Where flang -fc1 fails to parse preprocessor output because it now remains in fixed form. --- clang/lib/Driver/ToolChains/Flang.cpp | 7 +++++++ flang/test/Driver/pp-fixed-form.f90 | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 flang/test/Driver/pp-fixed-form.f90 diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 8c18c88fbde7f..72c0787d7df99 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -777,6 +777,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, addFortranDialectOptions(Args, CmdArgs); + // 'flang -E' always produces output that is suitable for use as fixed form + // Fortran. However it is only valid free form source if the original is also + // free form. + if (InputType == types::TY_PP_Fortran && + !Args.getLastArg(options::OPT_ffixed_form, options::OPT_ffree_form)) + CmdArgs.push_back("-ffixed-form"); + handleColorDiagnosticsArgs(D, Args, CmdArgs); // LTO mode is parsed by the Clang driver library. diff --git a/flang/test/Driver/pp-fixed-form.f90 b/flang/test/Driver/pp-fixed-form.f90 new file mode 100644 index 0000000000000..4695da78763ae --- /dev/null +++ b/flang/test/Driver/pp-fixed-form.f90 @@ -0,0 +1,19 @@ +!RUN: %flang -save-temps -### %S/Inputs/free-form-test.f90 2>&1 | FileCheck %s --check-prefix=FREE +FREE: "-fc1" {{.*}} "-o" "free-form-test.i" {{.*}} "-x" "f95-cpp-input" "{{.*}}/free-form-test.f90" +FREE-NEXT: "-fc1" {{.*}} "-ffixed-form" {{.*}} "-x" "f95" "free-form-test.i" + +!RUN: %flang -save-temps -### %S/Inputs/fixed-form-test.f 2>&1 | FileCheck %s --check-prefix=FIXED +FIXED: "-fc1" {{.*}} "-o" "fixed-form-test.i" {{.*}} "-x" "f95-cpp-input" "{{.*}}/fixed-form-test.f" +FIXED-NEXT: "-fc1" {{.*}} "-ffixed-form" {{.*}} "-x" "f95" "fixed-form-test.i" + +!RUN: %flang -save-temps -### -ffree-form %S/Inputs/free-form-test.f90 2>&1 | FileCheck %s --check-prefix=FREE-FLAG +FREE-FLAG: "-fc1" {{.*}} "-o" "free-form-test.i" {{.*}} "-x" "f95-cpp-input" "{{.*}}/free-form-test.f90" +FREE-FLAG-NEXT: "-fc1" {{.*}} "-emit-llvm-bc" "-ffree-form" +FREE-FLAG-NOT: "-ffixed-form" +FREE-FLAG-SAME: "-x" "f95" "free-form-test.i" + +!RUN: %flang -save-temps -### -ffixed-form %S/Inputs/fixed-form-test.f 2>&1 | FileCheck %s --check-prefix=FIXED-FLAG +FIXED-FLAG: "-fc1" {{.*}} "-o" "fixed-form-test.i" {{.*}} "-x" "f95-cpp-input" "{{.*}}/fixed-form-test.f" +FIXED-FLAG-NEXT: "-fc1" {{.*}} "-emit-llvm-bc" "-ffixed-form" +FIXED-FLAG-NOT: "-ffixed-form" +FIXED-FLAG-SAME: "-x" "f95" "fixed-form-test.i" From f33536468b7f05c05c8cf8088427b0b5b665eb65 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 3 Dec 2024 14:28:56 +0100 Subject: [PATCH 067/191] [InstCombine] Support gep nuw in icmp folds (#118472) Unsigned icmp of gep nuw folds to unsigned icmp of offsets. Unsigned icmp of gep nusw nuw folds to unsigned samesign icmp of offsets. Proofs: https://alive2.llvm.org/ce/z/VEwQY8 --- .../InstCombine/InstCombineCompares.cpp | 36 ++++++--- llvm/test/Transforms/InstCombine/icmp-gep.ll | 75 +++++++++++++++++++ 2 files changed, 102 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 579214c28fc30..ffc0b33171b8f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -690,13 +690,32 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (!isa(RHS)) RHS = RHS->stripPointerCasts(); + auto CanFold = [Cond](GEPNoWrapFlags NW) { + if (ICmpInst::isEquality(Cond)) + return true; + + // Unsigned predicates can be folded if the GEPs have *any* nowrap flags. + assert(ICmpInst::isUnsigned(Cond)); + return NW != GEPNoWrapFlags::none(); + }; + + auto NewICmp = [Cond](GEPNoWrapFlags NW, Value *Op1, Value *Op2) { + if (!NW.hasNoUnsignedWrap()) { + // Convert signed to unsigned comparison. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Op1, Op2); + } + + auto *I = new ICmpInst(Cond, Op1, Op2); + I->setSameSign(NW.hasNoUnsignedSignedWrap()); + return I; + }; + Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS && - (GEPLHS->hasNoUnsignedSignedWrap() || ICmpInst::isEquality(Cond))) { + if (PtrBase == RHS && CanFold(GEPLHS->getNoWrapFlags())) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). Value *Offset = EmitGEPOffset(GEPLHS); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, - Constant::getNullValue(Offset->getType())); + return NewICmp(GEPLHS->getNoWrapFlags(), Offset, + Constant::getNullValue(Offset->getType())); } if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) && @@ -814,19 +833,18 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return replaceInstUsesWith(I, // No comparison is needed here. ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond))); - else if (NumDifferences == 1 && NW.hasNoUnsignedSignedWrap()) { + else if (NumDifferences == 1 && CanFold(NW)) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); - // Make sure we do a signed comparison here. - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); + return NewICmp(NW, LHSV, RHSV); } } - if (NW.hasNoUnsignedSignedWrap() || CmpInst::isEquality(Cond)) { + if (CanFold(NW)) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS, /*RewriteGEP=*/true); Value *R = EmitGEPOffset(GEPRHS, /*RewriteGEP=*/true); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); + return NewICmp(NW, L, R); } } diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index 1545d034b2ac3..1bc000cd6ebf1 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -143,6 +143,44 @@ define i1 @ult_base_nusw(ptr %x, i64 %y) { ret i1 %r } +define i1 @ugt_base_nuw(ptr %x, i64 %y) { +; CHECK-LABEL: @ugt_base_nuw( +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[Y:%.*]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %g = getelementptr nuw i8, ptr %x, i64 %y + %r = icmp ugt ptr %g, %x + ret i1 %r +} + +define i1 @ugt_base_nusw_nuw(ptr %x, i64 %y) { +; CHECK-LABEL: @ugt_base_nusw_nuw( +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[Y:%.*]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %g = getelementptr nusw nuw i8, ptr %x, i64 %y + %r = icmp ugt ptr %g, %x + ret i1 %r +} + +define i1 @uge_base_nuw(ptr %x, i64 %y) { +; CHECK-LABEL: @uge_base_nuw( +; CHECK-NEXT: ret i1 true +; + %g = getelementptr nuw i8, ptr %x, i64 %y + %r = icmp uge ptr %g, %x + ret i1 %r +} + +define i1 @uge_base_nusw_nuw(ptr %x, i64 %y) { +; CHECK-LABEL: @uge_base_nusw_nuw( +; CHECK-NEXT: ret i1 true +; + %g = getelementptr nusw nuw i8, ptr %x, i64 %y + %r = icmp uge ptr %g, %x + ret i1 %r +} + define i1 @ugt_base_inbounds_commute(i64 %y) { ; CHECK-LABEL: @ugt_base_inbounds_commute( ; CHECK-NEXT: [[X:%.*]] = call ptr @getptr() @@ -319,6 +357,43 @@ define i1 @test60_nusw_inbounds(ptr %foo, i64 %i, i64 %j) { ret i1 %cmp } +define i1 @test60_nuw(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_nuw( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr nuw i32, ptr %foo, i64 %i + %gep2 = getelementptr nuw i8, ptr %foo, i64 %j + %cmp = icmp ult ptr %gep1, %gep2 + ret i1 %cmp +} + +define i1 @test60_nusw_nuw(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_nusw_nuw( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr nusw nuw i32, ptr %foo, i64 %i + %gep2 = getelementptr nusw nuw i8, ptr %foo, i64 %j + %cmp = icmp ult ptr %gep1, %gep2 + ret i1 %cmp +} + +define i1 @test60_nusw_nuw_mix(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_nusw_nuw_mix( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nuw i32, ptr [[FOO:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw i8, ptr [[FOO]], i64 [[J:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult ptr [[GEP1]], [[GEP2]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr nuw i32, ptr %foo, i64 %i + %gep2 = getelementptr nusw i8, ptr %foo, i64 %j + %cmp = icmp ult ptr %gep1, %gep2 + ret i1 %cmp +} + define i1 @test_gep_ult_no_inbounds(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test_gep_ult_no_inbounds( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[FOO:%.*]], i64 [[I:%.*]] From 51a895aded890e90493be59f7af0fa5a3b9b85aa Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 3 Dec 2024 13:31:04 +0000 Subject: [PATCH 068/191] IR: introduce struct with CmpInst::Predicate and samesign (#116867) Introduce llvm::CmpPredicate, an abstraction over a floating-point predicate, and a pack of an integer predicate with samesign information, in order to ease extending large portions of the codebase that take a CmpInst::Predicate to respect the samesign flag. We have chosen to demonstrate the utility of this new abstraction by migrating parts of ValueTracking, InstructionSimplify, and InstCombine from CmpInst::Predicate to llvm::CmpPredicate. There should be no functional changes, as we don't perform any extra optimizations with samesign in this patch, or use CmpPredicate::getMatching. The design approach taken by this patch allows for unaudited callers of APIs that take a llvm::CmpPredicate to silently drop the samesign information; it does not pose a correctness issue, and allows us to migrate the codebase piece-wise. --- .../llvm/Analysis/InstSimplifyFolder.h | 1 + .../llvm/Analysis/InstructionSimplify.h | 7 +- llvm/include/llvm/Analysis/ValueTracking.h | 7 +- llvm/include/llvm/IR/CmpPredicate.h | 62 +++++++++++ llvm/include/llvm/IR/Instructions.h | 39 ++++++- .../Transforms/InstCombine/InstCombiner.h | 9 +- llvm/lib/Analysis/InstructionSimplify.cpp | 102 ++++++++---------- llvm/lib/Analysis/ValueTracking.cpp | 10 +- llvm/lib/IR/Instructions.cpp | 22 +++- .../InstCombine/InstCombineCompares.cpp | 35 +++--- .../InstCombine/InstCombineInternal.h | 17 ++- .../InstCombine/InstructionCombining.cpp | 6 +- llvm/unittests/IR/InstructionsTest.cpp | 22 ++++ 13 files changed, 228 insertions(+), 111 deletions(-) create mode 100644 llvm/include/llvm/IR/CmpPredicate.h diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h index 430c3edc2f0dc..d4ae4dcc918cf 100644 --- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h +++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h @@ -22,6 +22,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetFolder.h" +#include "llvm/IR/CmpPredicate.h" #include "llvm/IR/IRBuilderFolder.h" #include "llvm/IR/Instruction.h" diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index cf7d3e044188a..fa291eeef198b 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -44,6 +44,7 @@ class DataLayout; class DominatorTree; class Function; class Instruction; +class CmpPredicate; class LoadInst; struct LoopStandardAnalysisResults; class Pass; @@ -152,11 +153,11 @@ Value *simplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *simplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an ICmpInst, fold the result or return null. -Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *simplifyICmpInst(CmpPredicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an FCmpInst, fold the result or return null. -Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *simplifyFCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); /// Given operands for a SelectInst, fold the result or return null. @@ -200,7 +201,7 @@ Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef Mask, //=== Helper functions for higher up the class hierarchy. /// Given operands for a CmpInst, fold the result or return null. -Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operand for a UnaryOperator, fold the result or return null. diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index c408e0a39cd18..8aa024a72afc8 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -1255,8 +1255,7 @@ std::optional isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue = true, unsigned Depth = 0); -std::optional isImpliedCondition(const Value *LHS, - CmpInst::Predicate RHSPred, +std::optional isImpliedCondition(const Value *LHS, CmpPredicate RHSPred, const Value *RHSOp0, const Value *RHSOp1, const DataLayout &DL, bool LHSIsTrue = true, @@ -1267,8 +1266,8 @@ std::optional isImpliedCondition(const Value *LHS, std::optional isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL); -std::optional isImpliedByDomCondition(CmpInst::Predicate Pred, - const Value *LHS, const Value *RHS, +std::optional isImpliedByDomCondition(CmpPredicate Pred, const Value *LHS, + const Value *RHS, const Instruction *ContextI, const DataLayout &DL); diff --git a/llvm/include/llvm/IR/CmpPredicate.h b/llvm/include/llvm/IR/CmpPredicate.h new file mode 100644 index 0000000000000..4b1be7beb2b66 --- /dev/null +++ b/llvm/include/llvm/IR/CmpPredicate.h @@ -0,0 +1,62 @@ +//===- CmpPredicate.h - CmpInst Predicate with samesign information -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A CmpInst::Predicate with any samesign information (applicable to ICmpInst). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_CMPPREDICATE_H +#define LLVM_IR_CMPPREDICATE_H + +#include "llvm/IR/InstrTypes.h" + +namespace llvm { +/// An abstraction over a floating-point predicate, and a pack of an integer +/// predicate with samesign information. Some functions in ICmpInst construct +/// and return this type in place of a Predicate. +class CmpPredicate { + CmpInst::Predicate Pred; + bool HasSameSign; + +public: + /// Constructed implictly with a either Predicate and samesign information, or + /// just a Predicate, dropping samesign information. + CmpPredicate(CmpInst::Predicate Pred, bool HasSameSign = false) + : Pred(Pred), HasSameSign(HasSameSign) { + assert(!HasSameSign || CmpInst::isIntPredicate(Pred)); + } + + /// Implictly converts to the underlying Predicate, dropping samesign + /// information. + operator CmpInst::Predicate() const { return Pred; } + + /// Query samesign information, for optimizations. + bool hasSameSign() const { return HasSameSign; } + + /// Compares two CmpPredicates taking samesign into account and returns the + /// canonicalized CmpPredicate if they match. An alternative to operator==. + /// + /// For example, + /// samesign ult + samesign ult -> samesign ult + /// samesign ult + ult -> ult + /// samesign ult + slt -> slt + /// ult + ult -> ult + /// ult + slt -> std::nullopt + static std::optional getMatching(CmpPredicate A, + CmpPredicate B); + + /// An operator== on the underlying Predicate. + bool operator==(CmpInst::Predicate P) const { return Pred == P; } + + /// There is no operator== defined on CmpPredicate. Use getMatching instead to + /// get the canonicalized matching CmpPredicate. + bool operator==(CmpPredicate) const = delete; +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 605964af5d676..a42bf6bca1b9f 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -24,6 +24,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CmpPredicate.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GEPNoWrapFlags.h" @@ -1203,6 +1204,33 @@ class ICmpInst: public CmpInst { #endif } + /// @returns the predicate along with samesign information. + CmpPredicate getCmpPredicate() const { + return {getPredicate(), hasSameSign()}; + } + + /// @returns the inverse predicate along with samesign information: static + /// variant. + static CmpPredicate getInverseCmpPredicate(CmpPredicate Pred) { + return {getInversePredicate(Pred), Pred.hasSameSign()}; + } + + /// @returns the inverse predicate along with samesign information. + CmpPredicate getInverseCmpPredicate() const { + return getInverseCmpPredicate(getCmpPredicate()); + } + + /// @returns the swapped predicate along with samesign information: static + /// variant. + static CmpPredicate getSwappedCmpPredicate(CmpPredicate Pred) { + return {getSwappedPredicate(Pred), Pred.hasSameSign()}; + } + + /// @returns the swapped predicate. + Predicate getSwappedCmpPredicate() const { + return getSwappedPredicate(getCmpPredicate()); + } + /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc. /// @returns the predicate that would be the result if the operand were /// regarded as signed. @@ -1212,7 +1240,7 @@ class ICmpInst: public CmpInst { } /// Return the signed version of the predicate: static variant. - static Predicate getSignedPredicate(Predicate pred); + static Predicate getSignedPredicate(Predicate Pred); /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc. /// @returns the predicate that would be the result if the operand were @@ -1223,14 +1251,15 @@ class ICmpInst: public CmpInst { } /// Return the unsigned version of the predicate: static variant. - static Predicate getUnsignedPredicate(Predicate pred); + static Predicate getUnsignedPredicate(Predicate Pred); - /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->Failed assert + /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->EQ /// @returns the unsigned version of the signed predicate pred or /// the signed version of the signed predicate pred. - static Predicate getFlippedSignednessPredicate(Predicate pred); + /// Static variant. + static Predicate getFlippedSignednessPredicate(Predicate Pred); - /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->Failed assert + /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->EQ /// @returns the unsigned version of the signed predicate pred or /// the signed version of the signed predicate pred. Predicate getFlippedSignednessPredicate() const { diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index 3075b7ebae59e..71592058e3456 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -157,7 +157,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { /// conditional branch or select to create a compare with a canonical /// (inverted) predicate which is then more likely to be matched with other /// values. - static bool isCanonicalPredicate(CmpInst::Predicate Pred) { + static bool isCanonicalPredicate(CmpPredicate Pred) { switch (Pred) { case CmpInst::ICMP_NE: case CmpInst::ICMP_ULE: @@ -185,10 +185,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { } std::optional> static getFlippedStrictnessPredicateAndConstant(CmpInst:: - Predicate - Pred, + CmpPredicate, + Constant *>> static getFlippedStrictnessPredicateAndConstant(CmpPredicate + Pred, Constant *C); static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI) { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 1a5bbbc7dfceb..05e8f5761c13c 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -63,9 +63,9 @@ static Value *simplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *simplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); -static Value *simplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, - unsigned); -static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyCmpInst(CmpPredicate, Value *, Value *, + const SimplifyQuery &, unsigned); +static Value *simplifyICmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse); static Value *simplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *simplifyXorInst(Value *, Value *, const SimplifyQuery &, @@ -132,8 +132,7 @@ static Constant *getFalse(Type *Ty) { return ConstantInt::getFalse(Ty); } static Constant *getTrue(Type *Ty) { return ConstantInt::getTrue(Ty); } /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? -static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, - Value *RHS) { +static bool isSameCompare(Value *V, CmpPredicate Pred, Value *LHS, Value *RHS) { CmpInst *Cmp = dyn_cast(V); if (!Cmp) return false; @@ -150,10 +149,9 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, /// %cmp = icmp sle i32 %sel, %rhs /// Compose new comparison by substituting %sel with either %tv or %fv /// and see if it simplifies. -static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, Value *Cond, - const SimplifyQuery &Q, unsigned MaxRecurse, - Constant *TrueOrFalse) { +static Value *simplifyCmpSelCase(CmpPredicate Pred, Value *LHS, Value *RHS, + Value *Cond, const SimplifyQuery &Q, + unsigned MaxRecurse, Constant *TrueOrFalse) { Value *SimplifiedCmp = simplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse); if (SimplifiedCmp == Cond) { // %cmp simplified to the select condition (%cond). @@ -167,18 +165,16 @@ static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS, } /// Simplify comparison with true branch of select -static Value *simplifyCmpSelTrueCase(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, Value *Cond, - const SimplifyQuery &Q, +static Value *simplifyCmpSelTrueCase(CmpPredicate Pred, Value *LHS, Value *RHS, + Value *Cond, const SimplifyQuery &Q, unsigned MaxRecurse) { return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse, getTrue(Cond->getType())); } /// Simplify comparison with false branch of select -static Value *simplifyCmpSelFalseCase(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, Value *Cond, - const SimplifyQuery &Q, +static Value *simplifyCmpSelFalseCase(CmpPredicate Pred, Value *LHS, Value *RHS, + Value *Cond, const SimplifyQuery &Q, unsigned MaxRecurse) { return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse, getFalse(Cond->getType())); @@ -471,9 +467,8 @@ static Value *threadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// We can simplify %cmp1 to true, because both branches of select are /// less than 3. We compose new comparison by substituting %tmp with both /// branches of select and see if it can be simplified. -static Value *threadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const SimplifyQuery &Q, - unsigned MaxRecurse) { +static Value *threadCmpOverSelect(CmpPredicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -564,7 +559,7 @@ static Value *threadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, /// comparison by seeing whether comparing with all of the incoming phi values /// yields the same result every time. If so returns the common result, /// otherwise returns null. -static Value *threadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, +static Value *threadCmpOverPHI(CmpPredicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -1001,7 +996,7 @@ Value *llvm::simplifyMulInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, /// Given a predicate and two operands, return true if the comparison is true. /// This is a helper for div/rem simplification where we return some other value /// when we can prove a relationship between the operands. -static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, +static bool isICmpTrue(CmpPredicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Value *V = simplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); Constant *C = dyn_cast_or_null(V); @@ -2601,7 +2596,7 @@ static Type *getCompareTy(Value *Op) { /// Rummage around inside V looking for something equivalent to the comparison /// "LHS Pred RHS". Return such a value if found, otherwise return null. /// Helper function for analyzing max/min idioms. -static Value *extractEquivalentCondition(Value *V, CmpInst::Predicate Pred, +static Value *extractEquivalentCondition(Value *V, CmpPredicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast(V); if (!SI) @@ -2710,8 +2705,8 @@ static bool haveNonOverlappingStorage(const Value *V1, const Value *V2) { // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. -static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const SimplifyQuery &Q) { +static Constant *computePointerICmp(CmpPredicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { assert(LHS->getType() == RHS->getType() && "Must have same types"); const DataLayout &DL = Q.DL; const TargetLibraryInfo *TLI = Q.TLI; @@ -2859,8 +2854,8 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS, } /// Fold an icmp when its operands have i1 scalar type. -static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const SimplifyQuery &Q) { +static Value *simplifyICmpOfBools(CmpPredicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { Type *ITy = getCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. if (!OpTy->isIntOrIntVectorTy(1)) @@ -2962,8 +2957,8 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, } /// Try hard to fold icmp with zero RHS because this is a common case. -static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const SimplifyQuery &Q) { +static Value *simplifyICmpWithZero(CmpPredicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { if (!match(RHS, m_Zero())) return nullptr; @@ -3022,7 +3017,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, +static Value *simplifyICmpWithConstant(CmpPredicate Pred, Value *LHS, Value *RHS, const InstrInfoQuery &IIQ) { Type *ITy = getCompareTy(RHS); // The return type. @@ -3115,8 +3110,8 @@ static void getUnsignedMonotonicValues(SmallPtrSetImpl &Res, Value *V, } } -static Value *simplifyICmpUsingMonotonicValues(ICmpInst::Predicate Pred, - Value *LHS, Value *RHS) { +static Value *simplifyICmpUsingMonotonicValues(CmpPredicate Pred, Value *LHS, + Value *RHS) { if (Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_ULT) return nullptr; @@ -3133,9 +3128,8 @@ static Value *simplifyICmpUsingMonotonicValues(ICmpInst::Predicate Pred, return nullptr; } -static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred, - BinaryOperator *LBO, Value *RHS, - const SimplifyQuery &Q, +static Value *simplifyICmpWithBinOpOnLHS(CmpPredicate Pred, BinaryOperator *LBO, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = getCompareTy(RHS); // The return type. @@ -3254,8 +3248,8 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred, // *) C1 < C2 && C1 >= 0, or // *) C2 < C1 && C1 <= 0. // -static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const InstrInfoQuery &IIQ) { +static bool trySimplifyICmpWithAdds(CmpPredicate Pred, Value *LHS, Value *RHS, + const InstrInfoQuery &IIQ) { // TODO: only support icmp slt for now. if (Pred != CmpInst::ICMP_SLT || !IIQ.UseInstrInfo) return false; @@ -3279,8 +3273,8 @@ static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS, /// TODO: A large part of this logic is duplicated in InstCombine's /// foldICmpBinOp(). We should be able to share that and avoid the code /// duplication. -static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const SimplifyQuery &Q, +static Value *simplifyICmpWithBinOp(CmpPredicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { BinaryOperator *LBO = dyn_cast(LHS); BinaryOperator *RBO = dyn_cast(RHS); @@ -3513,8 +3507,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, /// simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. -static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const SimplifyQuery &Q, +static Value *simplifyICmpWithMinMax(CmpPredicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = getCompareTy(LHS); // The return type. Value *A, *B; @@ -3698,7 +3692,7 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate, +static Value *simplifyICmpWithDominatingAssume(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { // Gracefully handle instructions that have not been inserted yet. @@ -3721,8 +3715,8 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate, return nullptr; } -static Value *simplifyICmpWithIntrinsicOnLHS(CmpInst::Predicate Pred, - Value *LHS, Value *RHS) { +static Value *simplifyICmpWithIntrinsicOnLHS(CmpPredicate Pred, Value *LHS, + Value *RHS) { auto *II = dyn_cast(LHS); if (!II) return nullptr; @@ -3770,9 +3764,8 @@ static std::optional getRange(Value *V, /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. -static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyICmpInst(CmpPredicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { - CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); if (Constant *CLHS = dyn_cast(LHS)) { @@ -4085,17 +4078,16 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return nullptr; } -Value *llvm::simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *llvm::simplifyICmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::simplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } /// Given operands for an FCmpInst, see if we can fold the result. /// If not, this returns null. -static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyFCmpInst(CmpPredicate Pred, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { - CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast(LHS)) { @@ -4320,7 +4312,7 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return nullptr; } -Value *llvm::simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *llvm::simplifyFCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { return ::simplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } @@ -4557,7 +4549,7 @@ static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, } static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS, - ICmpInst::Predicate Pred, Value *TVal, + CmpPredicate Pred, Value *TVal, Value *FVal) { // Canonicalize common cmp+sel operand as CmpLHS. if (CmpRHS == TVal || CmpRHS == FVal) { @@ -4631,8 +4623,8 @@ static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS, /// An alternative way to test if a bit is set or not uses sgt/slt instead of /// eq/ne. static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, - ICmpInst::Predicate Pred, - Value *TrueVal, Value *FalseVal) { + CmpPredicate Pred, Value *TrueVal, + Value *FalseVal) { if (auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred)) return simplifySelectBitTest(TrueVal, FalseVal, Res->X, &Res->Mask, Res->Pred == ICmpInst::ICMP_EQ); @@ -6142,14 +6134,14 @@ Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } /// Given operands for a CmpInst, see if we can fold the result. -static Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +static Value *simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) + if (CmpInst::isIntPredicate(Predicate)) return simplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); return simplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); } -Value *llvm::simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, +Value *llvm::simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::simplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } @@ -7187,7 +7179,7 @@ static Value *simplifyInstructionWithOperands(Instruction *I, case Instruction::Xor: return simplifyXorInst(NewOps[0], NewOps[1], Q, MaxRecurse); case Instruction::ICmp: - return simplifyICmpInst(cast(I)->getPredicate(), NewOps[0], + return simplifyICmpInst(cast(I)->getCmpPredicate(), NewOps[0], NewOps[1], Q, MaxRecurse); case Instruction::FCmp: return simplifyFCmpInst(cast(I)->getPredicate(), NewOps[0], diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index d81546d0c9fed..f2c6949e535d2 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9379,7 +9379,7 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) - return LPred == RPred; + return CmpPredicate::getMatching(LPred, RPred).has_value(); if (LPred == RPred) return isImpliedCondOperands(LPred, L0, L1, R0, R1); @@ -9392,7 +9392,7 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' /// instruction. static std::optional -isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, +isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred, const Value *RHSOp0, const Value *RHSOp1, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // The LHS must be an 'or', 'and', or a 'select' instruction. @@ -9422,7 +9422,7 @@ isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, } std::optional -llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, +llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred, const Value *RHSOp0, const Value *RHSOp1, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // Bail out when we hit the limit. @@ -9476,7 +9476,7 @@ std::optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, if (const ICmpInst *RHSCmp = dyn_cast(RHS)) { if (auto Implied = isImpliedCondition( - LHS, RHSCmp->getPredicate(), RHSCmp->getOperand(0), + LHS, RHSCmp->getCmpPredicate(), RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, LHSIsTrue, Depth)) return InvertRHS ? !*Implied : *Implied; return std::nullopt; @@ -9553,7 +9553,7 @@ std::optional llvm::isImpliedByDomCondition(const Value *Cond, return std::nullopt; } -std::optional llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, +std::optional llvm::isImpliedByDomCondition(CmpPredicate Pred, const Value *LHS, const Value *RHS, const Instruction *ContextI, diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 065ce3a017283..4f07a4c4dd017 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -3842,9 +3842,8 @@ std::optional ICmpInst::compare(const KnownBits &LHS, } CmpInst::Predicate ICmpInst::getFlippedSignednessPredicate(Predicate pred) { - assert(CmpInst::isRelational(pred) && - "Call only with non-equality predicates!"); - + if (CmpInst::isEquality(pred)) + return pred; if (isSigned(pred)) return getUnsignedPredicate(pred); if (isUnsigned(pred)) @@ -3916,6 +3915,23 @@ bool CmpInst::isImpliedFalseByMatchingCmp(Predicate Pred1, Predicate Pred2) { return isImpliedTrueByMatchingCmp(Pred1, getInversePredicate(Pred2)); } +//===----------------------------------------------------------------------===// +// CmpPredicate Implementation +//===----------------------------------------------------------------------===// + +std::optional CmpPredicate::getMatching(CmpPredicate A, + CmpPredicate B) { + if (A.Pred == B.Pred) + return A.HasSameSign == B.HasSameSign ? A : CmpPredicate(A.Pred); + if (A.HasSameSign && + A.Pred == ICmpInst::getFlippedSignednessPredicate(B.Pred)) + return B.Pred; + if (B.HasSameSign && + B.Pred == ICmpInst::getFlippedSignednessPredicate(A.Pred)) + return A.Pred; + return {}; +} + //===----------------------------------------------------------------------===// // SwitchInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index ffc0b33171b8f..b486c5b0b6fad 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -631,7 +631,7 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, GEPNoWrapFlags NW, /// We can look through PHIs, GEPs and casts in order to determine a common base /// between GEPLHS and RHS. static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, + CmpPredicate Cond, const DataLayout &DL, InstCombiner &IC) { // FIXME: Support vector of pointers. @@ -675,8 +675,7 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, /// Fold comparisons between a GEP instruction and something else. At this point /// we know that the GEP is on the LHS of the comparison. Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, - Instruction &I) { + CmpPredicate Cond, Instruction &I) { // Don't transform signed compares of GEPs into index compares. Even if the // GEP is inbounds, the final add of the base pointer can have signed overflow // and would change the result of the icmp. @@ -931,7 +930,7 @@ bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) { /// Fold "icmp pred (X+C), X". Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C, - ICmpInst::Predicate Pred) { + CmpPredicate Pred) { // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, // so the values can never be equal. Similarly for all other "or equals" // operators. @@ -3979,8 +3978,8 @@ Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp, } static Instruction * -foldICmpUSubSatOrUAddSatWithConstant(ICmpInst::Predicate Pred, - SaturatingInst *II, const APInt &C, +foldICmpUSubSatOrUAddSatWithConstant(CmpPredicate Pred, SaturatingInst *II, + const APInt &C, InstCombiner::BuilderTy &Builder) { // This transform may end up producing more than one instruction for the // intrinsic, so limit it to one user of the intrinsic. @@ -4064,7 +4063,7 @@ foldICmpUSubSatOrUAddSatWithConstant(ICmpInst::Predicate Pred, } static Instruction * -foldICmpOfCmpIntrinsicWithConstant(ICmpInst::Predicate Pred, IntrinsicInst *I, +foldICmpOfCmpIntrinsicWithConstant(CmpPredicate Pred, IntrinsicInst *I, const APInt &C, InstCombiner::BuilderTy &Builder) { std::optional NewPredicate = std::nullopt; @@ -4263,9 +4262,8 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) { return nullptr; } -Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred, - SelectInst *SI, Value *RHS, - const ICmpInst &I) { +Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI, + Value *RHS, const ICmpInst &I) { // Try to fold the comparison into the select arms, which will cause the // select to be converted into a logical and/or. auto SimplifyOp = [&](Value *Op, bool SelectCondIsTrue) -> Value * { @@ -4434,7 +4432,7 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q, /// The Mask can be a constant, too. /// For some predicates, the operands are commutative. /// For others, x can only be on a specific side. -static Value *foldICmpWithLowBitMaskedVal(ICmpInst::Predicate Pred, Value *Op0, +static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0, Value *Op1, const SimplifyQuery &Q, InstCombiner &IC) { @@ -5545,8 +5543,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, /// Fold icmp Pred min|max(X, Y), Z. Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax, - Value *Z, - ICmpInst::Predicate Pred) { + Value *Z, CmpPredicate Pred) { Value *X = MinMax->getLHS(); Value *Y = MinMax->getRHS(); if (ICmpInst::isSigned(Pred) && !MinMax->isSigned()) @@ -6899,8 +6896,8 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return nullptr; } -std::optional> -InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, +std::optional> +InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred, Constant *C) { assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && "Only for relational integer predicates."); @@ -7306,7 +7303,7 @@ static Instruction *foldReductionIdiom(ICmpInst &I, } // This helper will be called with icmp operands in both orders. -Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, +Instruction *InstCombinerImpl::foldICmpCommutative(CmpPredicate Pred, Value *Op0, Value *Op1, ICmpInst &CxtI) { // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. @@ -7434,7 +7431,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) + if (Value *V = simplifyICmpInst(I.getCmpPredicate(), Op0, Op1, Q)) return replaceInstUsesWith(I, V); // Comparing -val or val with non-zero is the same as just comparing val @@ -7541,10 +7538,10 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; - if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I)) + if (Instruction *Res = foldICmpCommutative(I.getCmpPredicate(), Op0, Op1, I)) return Res; if (Instruction *Res = - foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I)) + foldICmpCommutative(I.getSwappedCmpPredicate(), Op1, Op0, I)) return Res; if (I.isCommutative()) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 0508ed48fc19c..28474fec8238e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -652,10 +652,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final /// folded operation. void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN); - Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, Instruction &I); - Instruction *foldSelectICmp(ICmpInst::Predicate Pred, SelectInst *SI, - Value *RHS, const ICmpInst &I); + Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, CmpPredicate Cond, + Instruction &I); + Instruction *foldSelectICmp(CmpPredicate Pred, SelectInst *SI, Value *RHS, + const ICmpInst &I); bool foldAllocaCmp(AllocaInst *Alloca); Instruction *foldCmpLoadFromIndexedGlobal(LoadInst *LI, GetElementPtrInst *GEP, @@ -663,8 +663,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final ConstantInt *AndCst = nullptr); Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Constant *RHSC); - Instruction *foldICmpAddOpConst(Value *X, const APInt &C, - ICmpInst::Predicate Pred); + Instruction *foldICmpAddOpConst(Value *X, const APInt &C, CmpPredicate Pred); Instruction *foldICmpWithCastOp(ICmpInst &ICmp); Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp); @@ -678,7 +677,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final const APInt &C); Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax, - Value *Z, ICmpInst::Predicate Pred); + Value *Z, CmpPredicate Pred); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); Instruction *foldSignBitTest(ICmpInst &I); @@ -736,8 +735,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final const APInt &C); Instruction *foldICmpBitCast(ICmpInst &Cmp); Instruction *foldICmpWithTrunc(ICmpInst &Cmp); - Instruction *foldICmpCommutative(ICmpInst::Predicate Pred, Value *Op0, - Value *Op1, ICmpInst &CxtI); + Instruction *foldICmpCommutative(CmpPredicate Pred, Value *Op0, Value *Op1, + ICmpInst &CxtI); // Helpers of visitSelectInst(). Instruction *foldSelectOfBools(SelectInst &SI); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 32f2a30afad48..3325a1868ebde 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1753,9 +1753,9 @@ static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN, if (TerminatorBI && TerminatorBI->isConditional() && TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) { bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent(); - std::optional ImpliedCond = - isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(), - Ops[0], Ops[1], DL, LHSIsTrue); + std::optional ImpliedCond = isImpliedCondition( + TerminatorBI->getCondition(), ICmp->getCmpPredicate(), Ops[0], Ops[1], + DL, LHSIsTrue); if (ImpliedCond) return ConstantInt::getBool(I.getType(), ImpliedCond.value()); } diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index 0af812564c026..b4dbc4ed435aa 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -1923,5 +1923,27 @@ TEST(InstructionsTest, AtomicSyncscope) { EXPECT_TRUE(LLVMIsAtomicSingleThread(CmpXchg)); } +TEST(InstructionsTest, CmpPredicate) { + CmpPredicate P0(CmpInst::ICMP_ULE, false), P1(CmpInst::ICMP_ULE, true), + P2(CmpInst::ICMP_SLE, false), P3(CmpInst::ICMP_SLT, false); + CmpPredicate Q0 = P0, Q1 = P1, Q2 = P2; + CmpInst::Predicate R0 = P0, R1 = P1, R2 = P2; + + EXPECT_EQ(*CmpPredicate::getMatching(P0, P1), CmpInst::ICMP_ULE); + EXPECT_EQ(CmpPredicate::getMatching(P0, P1)->hasSameSign(), false); + EXPECT_EQ(*CmpPredicate::getMatching(P1, P1), CmpInst::ICMP_ULE); + EXPECT_EQ(CmpPredicate::getMatching(P1, P1)->hasSameSign(), true); + EXPECT_EQ(CmpPredicate::getMatching(P0, P2), std::nullopt); + EXPECT_EQ(*CmpPredicate::getMatching(P1, P2), CmpInst::ICMP_SLE); + EXPECT_EQ(CmpPredicate::getMatching(P1, P2)->hasSameSign(), false); + EXPECT_EQ(CmpPredicate::getMatching(P1, P3), std::nullopt); + EXPECT_FALSE(Q0.hasSameSign()); + EXPECT_TRUE(Q1.hasSameSign()); + EXPECT_FALSE(Q2.hasSameSign()); + EXPECT_EQ(P0, R0); + EXPECT_EQ(P1, R1); + EXPECT_EQ(P2, R2); +} + } // end anonymous namespace } // end namespace llvm From ff0babc9172b42a3e9f552d6959f9d98ae450633 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 14:34:32 +0100 Subject: [PATCH 069/191] [clang][bytecode] Fix discarded pointer subtractions (#118477) We need to pop the value. --- clang/lib/AST/ByteCode/Compiler.cpp | 5 ++++- clang/test/AST/ByteCode/literals.cpp | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index a95353fd2943c..705ab1ec8e8ab 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -1000,7 +1000,10 @@ bool Compiler::VisitPointerArithBinOp(const BinaryOperator *E) { if (!visitAsPointer(RHS, *RT) || !visitAsPointer(LHS, *LT)) return false; - return this->emitSubPtr(classifyPrim(E->getType()), E); + PrimType IntT = classifyPrim(E->getType()); + if (!this->emitSubPtr(IntT, E)) + return false; + return DiscardResult ? this->emitPop(IntT, E) : true; } PrimType OffsetType; diff --git a/clang/test/AST/ByteCode/literals.cpp b/clang/test/AST/ByteCode/literals.cpp index 13d6c4feb3500..662823c49cd4a 100644 --- a/clang/test/AST/ByteCode/literals.cpp +++ b/clang/test/AST/ByteCode/literals.cpp @@ -980,6 +980,8 @@ namespace DiscardExprs { __uuidof(number); // both-error {{cannot call operator __uuidof on a type with no GUID}} requires{false;}; + constexpr int *p = nullptr; + p - p; return 0; } From 2a0ee090dbb9af80222bc796ac34fd4b7cba421b Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 3 Dec 2024 13:40:51 +0000 Subject: [PATCH 070/191] IVDesc: strip redundant arg in getOpcode call (NFC) (#118476) --- llvm/lib/Analysis/IVDescriptors.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 23e11bdbeab4c..e1eb219cf977e 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1063,7 +1063,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { SmallVector RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { SmallVector ReductionOperations; - unsigned RedOp = getOpcode(Kind); + unsigned RedOp = getOpcode(); // Search down from the Phi to the LoopExitInstr, looking for instructions // with a single user of the correct type for the reduction. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 8a44b5b176c46..8903adbb738c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -550,7 +550,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } // Reduce all of the unrolled parts into a single vector. Value *ReducedPartRdx = RdxParts[0]; - unsigned Op = RecurrenceDescriptor::getOpcode(RK); + unsigned Op = RdxDesc.getOpcode(); if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) Op = Instruction::Or; @@ -2130,8 +2130,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain); else NewRed = State.Builder.CreateBinOp( - (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, - NewVecOp); + (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp); PrevInChain = NewRed; NextInChain = NewRed; } else { @@ -2142,7 +2141,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { NewRed, PrevInChain); else NextInChain = State.Builder.CreateBinOp( - (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); + (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain); } State.set(this, NextInChain, /*IsScalar*/ true); } @@ -2179,8 +2178,8 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) { if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev); else - NewRed = Builder.CreateBinOp( - (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, Prev); + NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(), + NewRed, Prev); } State.set(this, NewRed, /*IsScalar*/ true); } From e1cb316cfd99208363b5eb9bf96430ca28020be0 Mon Sep 17 00:00:00 2001 From: Vladislav Belov Date: Tue, 3 Dec 2024 16:46:01 +0300 Subject: [PATCH 071/191] Reapply "[clang] Fix name lookup for dependent bases" (#118003) Unlike the previous version (https://github.com/llvm/llvm-project/pull/114978), this patch also removes an unnecessary assert that causes Clang to crash when compiling such tests. (clang/lib/AST/DeclCXX.cpp) https://lab.llvm.org/buildbot/#/builders/52/builds/4021 ```c++ template class X { public: X() = default; virtual ~X() = default; virtual int foo(int x, int y, T &entry) = 0; void bar() { struct Y : public X { Y() : X() {} int foo(int, int, T &) override { return 42; } }; } }; ``` the assertions: ```c++ llvm-project/clang/lib/AST/DeclCXX.cpp:2508: void clang::CXXMethodDecl::addOverriddenMethod(const CXXMethodDecl *): Assertion `!MD->getParent()->isDependentContext() && "Can't add an overridden method to a class template!"' failed. ``` I believe that this assert is unnecessary and contradicts the logic of this patch. After its removal, Clang was successfully built using itself, and all tests passed. --- clang/docs/ReleaseNotes.rst | 3 ++ clang/lib/AST/CXXInheritance.cpp | 18 ++++++++---- clang/lib/AST/DeclCXX.cpp | 2 -- clang/test/CXX/drs/cwg5xx.cpp | 48 ++++++++++++++++++++++++++++++-- clang/www/cxx_dr_status.html | 2 +- 5 files changed, 62 insertions(+), 11 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 01c7899e36c93..395da768f7c32 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -310,6 +310,9 @@ Resolutions to C++ Defect Reports by default. (`CWG2521: User-defined literals and reserved identifiers `_). +- Fix name lookup for a dependent base class that is the current instantiation. + (`CWG591: When a dependent base class is the current instantiation `_). + C Language Changes ------------------ diff --git a/clang/lib/AST/CXXInheritance.cpp b/clang/lib/AST/CXXInheritance.cpp index aefc06e9197cf..10b8d524ff897 100644 --- a/clang/lib/AST/CXXInheritance.cpp +++ b/clang/lib/AST/CXXInheritance.cpp @@ -134,7 +134,7 @@ bool CXXRecordDecl::forallBases(ForallBasesCallback BaseMatches) const { return false; CXXRecordDecl *Base = - cast_or_null(Ty->getDecl()->getDefinition()); + cast_if_present(Ty->getDecl()->getDefinition()); if (!Base || (Base->isDependentContext() && !Base->isCurrentInstantiation(Record))) { @@ -169,13 +169,21 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context, QualType BaseType = Context.getCanonicalType(BaseSpec.getType()).getUnqualifiedType(); + bool isCurrentInstantiation = isa(BaseType); + if (!isCurrentInstantiation) { + if (auto *BaseRecord = cast_if_present( + BaseSpec.getType()->getAsRecordDecl())) + isCurrentInstantiation = BaseRecord->isDependentContext() && + BaseRecord->isCurrentInstantiation(Record); + } // C++ [temp.dep]p3: // In the definition of a class template or a member of a class template, // if a base class of the class template depends on a template-parameter, // the base class scope is not examined during unqualified name lookup // either at the point of definition of the class template or member or // during an instantiation of the class tem- plate or member. - if (!LookupInDependent && BaseType->isDependentType()) + if (!LookupInDependent && + (BaseType->isDependentType() && !isCurrentInstantiation)) continue; // Determine whether we need to visit this base class at all, @@ -243,9 +251,8 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context, return FoundPath; } } else if (VisitBase) { - CXXRecordDecl *BaseRecord; + CXXRecordDecl *BaseRecord = nullptr; if (LookupInDependent) { - BaseRecord = nullptr; const TemplateSpecializationType *TST = BaseSpec.getType()->getAs(); if (!TST) { @@ -264,8 +271,7 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context, BaseRecord = nullptr; } } else { - BaseRecord = cast( - BaseSpec.getType()->castAs()->getDecl()); + BaseRecord = cast(BaseSpec.getType()->getAsRecordDecl()); } if (BaseRecord && lookupInBases(Context, BaseRecord, BaseMatches, LookupInDependent)) { diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index f2f2835641245..af73c658d6a0c 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2602,8 +2602,6 @@ bool CXXMethodDecl::isMoveAssignmentOperator() const { void CXXMethodDecl::addOverriddenMethod(const CXXMethodDecl *MD) { assert(MD->isCanonicalDecl() && "Method is not canonical!"); - assert(!MD->getParent()->isDependentContext() && - "Can't add an overridden method to a class template!"); assert(MD->isVirtual() && "Method is not virtual!"); getASTContext().addOverriddenMethod(this, MD); diff --git a/clang/test/CXX/drs/cwg5xx.cpp b/clang/test/CXX/drs/cwg5xx.cpp index ed0c7159dfc88..91a76fd2adbb6 100644 --- a/clang/test/CXX/drs/cwg5xx.cpp +++ b/clang/test/CXX/drs/cwg5xx.cpp @@ -1178,17 +1178,61 @@ namespace cwg590 { // cwg590: yes template typename A::B::C A::B::C::f(A::B::C) {} } -namespace cwg591 { // cwg591: no +namespace cwg591 { // cwg591: 20 template struct A { typedef int M; struct B { typedef void M; struct C; + struct D; + }; + }; + + template struct G { + struct B { + typedef int M; + struct C { + typedef void M; + struct D; + }; + }; + }; + + template struct H { + template struct B { + typedef int M; + template struct C { + typedef void M; + struct D; + struct P; + }; }; }; template struct A::B::C : A { - // FIXME: Should find member of non-dependent base class A. + M m; + }; + + template struct G::B::C::D : B { + M m; + }; + + template + template + template + struct H::B::C::D : B { + M m; + }; + + template struct A::B::D : A { + M m; + // expected-error@-1 {{field has incomplete type 'M' (aka 'void'}} + }; + + template + template + template + struct H::B::C::P : B { M m; // expected-error@-1 {{field has incomplete type 'M' (aka 'void'}} }; diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 186f7cc0ace54..cdedbcbaa4072 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -3599,7 +3599,7 @@

C++ defect report implementation status

591 CD4 When a dependent base class is the current instantiation - No + Clang 20 592 From 7802fb5f514be327576b69569556ec9096e5fdd7 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 3 Dec 2024 14:48:55 +0100 Subject: [PATCH 072/191] [clang][bytecode] Fix `__extension__` handling for vector operators (#118482) Don't reject them, but delegate to the subexpression. --- clang/lib/AST/ByteCode/Compiler.cpp | 3 +++ clang/test/AST/ByteCode/vectors.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 705ab1ec8e8ab..eb102f1e5c7f2 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5914,6 +5914,9 @@ bool Compiler::VisitVectorUnaryOperator(const UnaryOperator *E) { return this->discard(SubExpr); auto UnaryOp = E->getOpcode(); + if (UnaryOp == UO_Extension) + return this->delegate(SubExpr); + if (UnaryOp != UO_Plus && UnaryOp != UO_Minus && UnaryOp != UO_LNot && UnaryOp != UO_Not && UnaryOp != UO_AddrOf) return this->emitInvalid(E); diff --git a/clang/test/AST/ByteCode/vectors.cpp b/clang/test/AST/ByteCode/vectors.cpp index 08e2ca2adbf5c..a04b678a623a1 100644 --- a/clang/test/AST/ByteCode/vectors.cpp +++ b/clang/test/AST/ByteCode/vectors.cpp @@ -37,6 +37,7 @@ static_assert(arr4[1][0] == 0, ""); static_assert(arr4[1][0] == 0, ""); static_assert(arr4[1][0] == 0, ""); +constexpr VI4 B = __extension__(A); /// From constant-expression-cxx11.cpp namespace Vector { From 8c749ff8aa787049cea4d4f7331493ee17565344 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 3 Dec 2024 15:18:59 +0100 Subject: [PATCH 073/191] [lldb] Fix "exact match" debug_names type queries (#118465) .. in the global namespace The problem was the interaction of #116989 with an optimization in GetTypesWithQuery. The optimization was only correct for non-exact matches, but that didn't matter before this PR due to the "second layer of defense". After that was removed, the query started returning more types than it should. --- .../Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 2 +- lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index 6f2cb455ec00e..c71c2dd47344a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -527,7 +527,7 @@ void DebugNamesDWARFIndex::GetTypesWithQuery( ConstString name = query.GetTypeBasename(); std::vector query_context = query.GetContextRef(); - if (query_context.size() <= 1) + if (query_context.size() <= 1 && !query.GetExactMatch()) return GetTypes(name, callback); llvm::SmallVector parent_contexts = diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp index 2ed7b219d8da3..af49206608723 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp @@ -4,6 +4,8 @@ // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s +// RUN: lldb-test symbols --name=::foo --find=type %t | \ +// RUN: FileCheck --check-prefix=EXACT %s // RUN: lldb-test symbols --name=foo --context=context --find=type %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s // RUN: lldb-test symbols --name=not_there --find=type %t | \ @@ -12,6 +14,8 @@ // RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s +// RUN: lldb-test symbols --name=::foo --find=type %t | \ +// RUN: FileCheck --check-prefix=EXACT %s // RUN: lldb-test symbols --name=foo --context=context --find=type %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s // RUN: lldb-test symbols --name=not_there --find=type %t | \ @@ -22,6 +26,8 @@ // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s +// RUN: lldb-test symbols --name=::foo --find=type %t | \ +// RUN: FileCheck --check-prefix=EXACT %s // RUN: lldb-test symbols --name=foo --context=context --find=type %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s // RUN: lldb-test symbols --name=not_there --find=type %t | \ @@ -31,9 +37,11 @@ // EMPTY: Found 0 types: // NAME: Found 4 types: +// EXACT: Found 1 types: // CONTEXT: Found 1 types: struct foo { }; // NAME-DAG: name = "foo", {{.*}} decl = find-basic-type.cpp:[[@LINE-1]] +// EXACT-DAG: name = "foo", {{.*}} decl = find-basic-type.cpp:[[@LINE-2]] namespace bar { int context; From 57907c1a96e82c17b16111b919ab7c0f3d4370ab Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Tue, 3 Dec 2024 22:25:29 +0800 Subject: [PATCH 074/191] [clang-tidy] ignore `[[clang::lifetimebound]]` param in return-const-ref-from-parameter (#118315) Fixed #117696 --- .../ReturnConstRefFromParameterCheck.cpp | 12 +++++++- clang-tools-extra/docs/ReleaseNotes.rst | 3 +- .../return-const-ref-from-parameter.rst | 29 +++++++++++++------ .../return-const-ref-from-parameter.cpp | 6 ++++ 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp index 7da27c0474d51..1bd7abbad66d2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "ReturnConstRefFromParameterCheck.h" +#include "clang/AST/Attrs.inc" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -15,6 +16,14 @@ using namespace clang::ast_matchers; namespace clang::tidy::bugprone { +namespace { + +AST_MATCHER(ParmVarDecl, hasLifetimeBoundAttr) { + return Node.hasAttr(); +} + +} // namespace + void ReturnConstRefFromParameterCheck::registerMatchers(MatchFinder *Finder) { const auto DRef = ignoringParens( declRefExpr( @@ -22,7 +31,8 @@ void ReturnConstRefFromParameterCheck::registerMatchers(MatchFinder *Finder) { qualType(lValueReferenceType(pointee( qualType(isConstQualified())))) .bind("type"))), - hasDeclContext(functionDecl().bind("owner"))) + hasDeclContext(functionDecl().bind("owner")), + unless(hasLifetimeBoundAttr())) .bind("param"))) .bind("dref")); const auto Func = diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 453a91e3b504c..e00f86f7d0144 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -184,7 +184,8 @@ Changes in existing checks ` check to diagnose potential dangling references when returning a ``const &`` parameter by using the conditional operator ``cond ? var1 : var2`` and no longer giving - false positives for functions which contain lambda. + false positives for functions which contain lambda and ignore parameters + with ``[[clang::lifetimebound]]`` attribute. - Improved :doc:`bugprone-sizeof-expression ` check to find suspicious diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst index 2349e51477b7d..ba47399914de3 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst @@ -12,15 +12,6 @@ after the call. When the function returns such a parameter also as constant refe then the returned reference can be used after the object it refers to has been destroyed. -This issue can be resolved by declaring an overload of the problematic function -where the ``const &`` parameter is instead declared as ``&&``. The developer has -to ensure that the implementation of that function does not produce a -use-after-free, the exact error that this check is warning against. -Marking such an ``&&`` overload as ``deleted``, will silence the warning as -well. In the case of different ``const &`` parameters being returned depending -on the control flow of the function, an overload where all problematic -``const &`` parameters have been declared as ``&&`` will resolve the issue. - Example ------- @@ -38,3 +29,23 @@ Example const S& s = fn(S{1}); s.v; // use after free + + +This issue can be resolved by declaring an overload of the problematic function +where the ``const &`` parameter is instead declared as ``&&``. The developer has +to ensure that the implementation of that function does not produce a +use-after-free, the exact error that this check is warning against. +Marking such an ``&&`` overload as ``deleted``, will silence the warning as +well. In the case of different ``const &`` parameters being returned depending +on the control flow of the function, an overload where all problematic +``const &`` parameters have been declared as ``&&`` will resolve the issue. + +This issue can also be resolved by adding ``[[clang::lifetimebound]]``. Clang +enable ``-Wdangling`` warning by default which can detect mis-uses of the +annotated function. See `lifetimebound attribute `_ +for details. + +.. code-block:: c++ + + const int &f(const int &a [[clang::lifetimebound]]) { return a; } // no warning + const int &v = f(1); // warning: temporary bound to local reference 'v' will be destroyed at the end of the full-expression [-Wdangling] diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/return-const-ref-from-parameter.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/return-const-ref-from-parameter.cpp index 49aeb50155b15..46cb9063beda9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/return-const-ref-from-parameter.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/return-const-ref-from-parameter.cpp @@ -197,3 +197,9 @@ int const &overload_params_difference3(int p1, int const &a, int p2) { return a; int const &overload_params_difference3(int p1, long &&a, int p2); } // namespace overload + +namespace gh117696 { +namespace use_lifetime_bound_attr { +int const &f(int const &a [[clang::lifetimebound]]) { return a; } +} // namespace use_lifetime_bound_attr +} // namespace gh117696 From c7e14689dd8bdf4b18dccb18895a9b80c225dc53 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 3 Dec 2024 15:41:21 +0100 Subject: [PATCH 075/191] [SPIR-V] Add XFAIL to the broken test (#118487) The test case llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll fails due to https://github.com/llvm/llvm-project/issues/118011 --- llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll index d12914d378542..03aedeb97dbb4 100644 --- a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll +++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll @@ -1,3 +1,6 @@ +; Issue #118011 +; XFAIL: * + ; RUN: llc --verify-machineinstrs --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info --print-after=spirv-nonsemantic-debug-info -O0 -mtriple=spirv64-unknown-unknown %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-MIR ; RUN: llc --verify-machineinstrs --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; RUN: llc --verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_non_semantic_info %s -o - | FileCheck %s --check-prefix=CHECK-OPTION From a7fda0e1e4dfd17ccf4a1bb80024acca2da6424e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 3 Dec 2024 14:53:51 +0000 Subject: [PATCH 076/191] [VPlan] Introduce VPScalarPHIRecipe, use for can & EVL IV codegen (NFC). (#114305) Introduce a general recipe to generate a scalar phi. Lower VPCanonicalIVPHIRecipe and VPEVLBasedIVRecipe to VPScalarIVPHIrecipe before plan execution, avoiding the need for duplicated ::execute implementations. There are other cases that could benefit, including in-loop reduction phis and pointer induction phis. Builds on a similar idea as https://github.com/llvm/llvm-project/pull/82270. PR: https://github.com/llvm/llvm-project/pull/114305 --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 + llvm/lib/Transforms/Vectorize/VPlan.cpp | 7 ++- llvm/lib/Transforms/Vectorize/VPlan.h | 52 +++++++++++++++++-- .../Transforms/Vectorize/VPlanAnalysis.cpp | 16 +++--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 41 ++++++++------- .../Transforms/Vectorize/VPlanTransforms.cpp | 21 ++++++++ .../Transforms/Vectorize/VPlanTransforms.h | 3 ++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + 8 files changed, 106 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 90312c1a28df3..3c7c044a04271 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7721,6 +7721,7 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan.prepareToExecute(ILV.getTripCount(), ILV.getOrCreateVectorTripCount(nullptr), CanonicalIVStartValue, State); + VPlanTransforms::prepareToExecute(BestVPlan); BestVPlan.execute(&State); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 529108a5aaa97..b801d1863e252 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1070,10 +1070,9 @@ void VPlan::execute(VPTransformState *State) { } auto *PhiR = cast(&R); - bool NeedsScalar = - isa(PhiR) || - (isa(PhiR) && - cast(PhiR)->isInLoop()); + bool NeedsScalar = isa(PhiR) || + (isa(PhiR) && + cast(PhiR)->isInLoop()); Value *Phi = State->get(PhiR, NeedsScalar); Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar); cast(Phi)->addIncoming(Val, VectorLatchBB); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1b1630ebc6c23..e1d828f038f9a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2239,6 +2239,45 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, #endif }; +/// Recipe to generate a scalar PHI. Used to generate code for recipes that +/// produce scalar header phis, including VPCanonicalIVPHIRecipe and +/// VPEVLBasedIVPHIRecipe. +class VPScalarPHIRecipe : public VPHeaderPHIRecipe { + std::string Name; + +public: + VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, + StringRef Name) + : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL), + Name(Name.str()) { + addOperand(BackedgeValue); + } + + ~VPScalarPHIRecipe() override = default; + + VPScalarPHIRecipe *clone() override { + llvm_unreachable("cloning not implemented yet"); + } + + VP_CLASSOF_IMPL(VPDef::VPScalarPHISC) + + /// Generate the phi/select nodes. + void execute(VPTransformState &State) override; + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling phis that are widened in the vector loop. /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are /// managed in the recipe directly. @@ -3134,8 +3173,10 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { return D->getVPDefID() == VPDef::VPCanonicalIVPHISC; } - /// Generate the canonical scalar induction phi of the vector loop. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable( + "cannot execute this recipe, should be replaced by VPScalarPHIRecipe"); + } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -3231,9 +3272,10 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC; } - /// Generate phi for handling IV based on EVL over iterations correctly. - /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable( + "cannot execute this recipe, should be replaced by VPScalarPHIRecipe"); + } /// Return the cost of this VPEVLBasedIVPHIRecipe. InstructionCost computeCost(ElementCount VF, diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index cb42cfe8159b0..969d07b229e46 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -213,14 +213,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { TypeSwitch(V->getDefiningRecipe()) .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe, + VPScalarPHIRecipe>([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .CasegetLiveInIRValue(); - PHINode *Phi = PHINode::Create(Start->getType(), 2, "index"); - Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); - - BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Phi->addIncoming(Start, VectorPH); - Phi->setDebugLoc(getDebugLoc()); - State.set(this, Phi, /*IsScalar*/ true); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -3153,8 +3142,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { assert(!onlyScalarsGenerated(State.VF.isScalable()) && "Recipe should have been replaced"); - auto *IVR = getParent()->getPlan()->getCanonicalIV(); - PHINode *CanonicalIV = cast(State.get(IVR, /*IsScalar*/ true)); unsigned CurrentPart = getUnrollPart(*this); // Build a pointer phi @@ -3164,6 +3151,12 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); PHINode *NewPointerPhi = nullptr; if (CurrentPart == 0) { + auto *IVR = cast(&getParent() + ->getPlan() + ->getVectorLoopRegion() + ->getEntryBasicBlock() + ->front()); + PHINode *CanonicalIV = cast(State.get(IVR, /*IsScalar*/ true)); NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV->getIterator()); NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); @@ -3477,20 +3470,30 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif + +void VPScalarPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); Value *Start = State.get(getOperand(0), VPLane(0)); - PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); + PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name); Phi->addIncoming(Start, VectorPH); Phi->setDebugLoc(getDebugLoc()); State.set(this, Phi, /*IsScalar=*/true); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; - +void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "SCALAR-PHI"; printAsOperand(O, SlotTracker); O << " = phi "; printOperands(O, SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1b333bdc30ff1..cee83d1015b53 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1819,3 +1819,24 @@ void VPlanTransforms::createInterleaveGroups( } } } + +void VPlanTransforms::prepareToExecute(VPlan &Plan) { + ReversePostOrderTraversal> RPOT( + Plan.getVectorLoopRegion()); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) { + if (!isa(&R)) + continue; + auto *PhiR = cast(&R); + StringRef Name = + isa(PhiR) ? "index" : "evl.based.iv"; + auto *ScalarR = + new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(), + PhiR->getDebugLoc(), Name); + ScalarR->insertBefore(PhiR); + PhiR->replaceAllUsesWith(ScalarR); + PhiR->eraseFromParent(); + } + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 11e094db6294f..1491e0a8df04d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -123,6 +123,9 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); + + /// Lower abstract recipes to concrete ones, that can be codegen'd. + static void prepareToExecute(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 691b0d40823cf..957a602091c73 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -359,6 +359,7 @@ class VPDef { VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, + VPScalarPHISC, VPReductionPHISC, // END: SubclassID for recipes that inherit VPHeaderPHIRecipe // END: Phi-like recipes From 10223c72a9c2afc3ae86eb7995f0dc47f5f9ad44 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 3 Dec 2024 15:54:10 +0100 Subject: [PATCH 077/191] [ConstraintElim] Use nusw flag for GEP decomposition Check for nusw instead of inbounds when decomposing GEPs. In this particular case, we can also look through multiple nusw flags, because we will ultimately be working in the unsigned constraint system. --- .../Scalar/ConstraintElimination.cpp | 10 +-- .../ConstraintElimination/gep-arithmetic.ll | 65 +++++++++++++++++++ 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 4884c23f16e12..944be38cb94bc 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -387,12 +387,12 @@ struct OffsetResult { Value *BasePtr; APInt ConstantOffset; SmallMapVector VariableOffsets; - bool AllInbounds; + GEPNoWrapFlags NW; OffsetResult() : BasePtr(nullptr), ConstantOffset(0, uint64_t(0)) {} OffsetResult(GEPOperator &GEP, const DataLayout &DL) - : BasePtr(GEP.getPointerOperand()), AllInbounds(GEP.isInBounds()) { + : BasePtr(GEP.getPointerOperand()), NW(GEP.getNoWrapFlags()) { ConstantOffset = APInt(DL.getIndexTypeSizeInBits(BasePtr->getType()), 0); } }; @@ -426,7 +426,7 @@ static OffsetResult collectOffsets(GEPOperator &GEP, const DataLayout &DL) { Result.ConstantOffset += ConstantOffset2; if (Result.VariableOffsets.size() == 0 && VariableOffsets2.size() == 1) Result.VariableOffsets = VariableOffsets2; - Result.AllInbounds &= InnerGEP->isInBounds(); + Result.NW &= InnerGEP->getNoWrapFlags(); } return Result; } @@ -450,9 +450,9 @@ static Decomposition decomposeGEP(GEPOperator &GEP, assert(!IsSigned && "The logic below only supports decomposition for " "unsigned predicates at the moment."); - const auto &[BasePtr, ConstantOffset, VariableOffsets, AllInbounds] = + const auto &[BasePtr, ConstantOffset, VariableOffsets, NW] = collectOffsets(GEP, DL); - if (!BasePtr || !AllInbounds) + if (!BasePtr || !NW.hasNoUnsignedSignedWrap()) return &GEP; Decomposition Result(ConstantOffset.getSExtValue(), DecompEntry(1, BasePtr)); diff --git a/llvm/test/Transforms/ConstraintElimination/gep-arithmetic.ll b/llvm/test/Transforms/ConstraintElimination/gep-arithmetic.ll index a4d825b327969..5e2bab28807f5 100644 --- a/llvm/test/Transforms/ConstraintElimination/gep-arithmetic.ll +++ b/llvm/test/Transforms/ConstraintElimination/gep-arithmetic.ll @@ -630,3 +630,68 @@ ptr.check: exit: ret i4 3 } + +define i1 @test_nusw(ptr %p, i32 %x, i32 %y) { +; CHECK-LABEL: @test_nusw( +; CHECK-NEXT: [[X_EXT:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[Y_EXT:%.*]] = zext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[X_EXT]], [[Y_EXT]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) +; CHECK-NEXT: [[GEP_X:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 [[X_EXT]] +; CHECK-NEXT: [[GEP_Y:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[Y_EXT]] +; CHECK-NEXT: ret i1 true +; + %x.ext = zext i32 %x to i64 + %y.ext = zext i32 %y to i64 + %cmp1 = icmp ugt i64 %x.ext, %y.ext + call void @llvm.assume(i1 %cmp1) + %gep.x = getelementptr nusw i8, ptr %p, i64 %x.ext + %gep.y = getelementptr nusw i8, ptr %p, i64 %y.ext + %cmp2 = icmp ugt ptr %gep.x, %gep.y + ret i1 %cmp2 +} + +define i1 @test_nusw_nested(ptr %p, i32 %x, i32 %y) { +; CHECK-LABEL: @test_nusw_nested( +; CHECK-NEXT: [[X_EXT:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[Y_EXT:%.*]] = zext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[X_EXT]], [[Y_EXT]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) +; CHECK-NEXT: [[GEP_X:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 [[X_EXT]] +; CHECK-NEXT: [[GEP_X1:%.*]] = getelementptr nusw i8, ptr [[GEP_X]], i64 1 +; CHECK-NEXT: [[GEP_Y:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[Y_EXT]] +; CHECK-NEXT: ret i1 true +; + %x.ext = zext i32 %x to i64 + %y.ext = zext i32 %y to i64 + %cmp1 = icmp ugt i64 %x.ext, %y.ext + call void @llvm.assume(i1 %cmp1) + %gep.x = getelementptr nusw i8, ptr %p, i64 %x.ext + %gep.x1 = getelementptr nusw i8, ptr %gep.x, i64 1 + %gep.y = getelementptr nusw i8, ptr %p, i64 %y.ext + %cmp2 = icmp ugt ptr %gep.x1, %gep.y + ret i1 %cmp2 +} + +define i1 @test_missing_nusw(ptr %p, i32 %x, i32 %y) { +; CHECK-LABEL: @test_missing_nusw( +; CHECK-NEXT: [[X_EXT:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[Y_EXT:%.*]] = zext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[X_EXT]], [[Y_EXT]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) +; CHECK-NEXT: [[GEP_X:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 [[X_EXT]] +; CHECK-NEXT: [[GEP_X1:%.*]] = getelementptr i8, ptr [[GEP_X]], i64 1 +; CHECK-NEXT: [[GEP_Y:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[Y_EXT]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt ptr [[GEP_X1]], [[GEP_Y]] +; CHECK-NEXT: ret i1 [[CMP2]] +; + %x.ext = zext i32 %x to i64 + %y.ext = zext i32 %y to i64 + %cmp1 = icmp ugt i64 %x.ext, %y.ext + call void @llvm.assume(i1 %cmp1) + %gep.x = getelementptr nusw i8, ptr %p, i64 %x.ext + %gep.x1 = getelementptr i8, ptr %gep.x, i64 1 + %gep.y = getelementptr nusw i8, ptr %p, i64 %y.ext + %cmp2 = icmp ugt ptr %gep.x1, %gep.y + ret i1 %cmp2 +} From db4cbe50696798e94585cafec1a24764e9dcf199 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 3 Dec 2024 16:08:25 +0100 Subject: [PATCH 078/191] [SPIR-V] Fix generation of invalid SPIR-V in cases of of bitcasts between pointers and multiple null pointers used in the input LLVM IR (#118298) This PR resolved the following issues: (1) There are rare but possible cases when there are bitcasts between pointers intertwined in a sophisticated way with loads, stores, function calls and other instructions that are part of type deduction. In this case we must account for inserted bitcasts between pointers rather than just ignore them. (2) Null pointers have the same constant representation but different types. Type info from Intrinsic::spv_track_constant() refers to the opaque (untyped) pointer, so that each MF/v-reg pair would fall into the same Const record in Duplicate Tracker and would be represented by a single OpConstantNull instruction, unless we use precise pointee type info. We must be able to distinguish one constant (null) pointer from another to avoid generating invalid code with inconsistent types of operands. --- llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 63 +- .../CodeGen/SPIRV/pointers/tangled-ret.ll | 235 + .../OpPhi_ArgumentsPlaceholders.ll | 11 +- .../validate/sycl-tangle-group-algorithms.ll | 4673 +++++++++++++++++ 4 files changed, 4959 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/pointers/tangled-ret.ll create mode 100644 llvm/test/CodeGen/SPIRV/validate/sycl-tangle-group-algorithms.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index e6f136cc81b4b..82d354a3e3a22 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -475,7 +475,7 @@ void SPIRVEmitIntrinsics::propagateElemType( DenseMap Ptrcasts; SmallVector Users(Op->users()); for (auto *U : Users) { - if (!isa(U) || isa(U) || isSpvIntrinsic(U)) + if (!isa(U) || isSpvIntrinsic(U)) continue; if (!VisitedSubst.insert(std::make_pair(U, Op)).second) continue; @@ -506,7 +506,7 @@ void SPIRVEmitIntrinsics::propagateElemTypeRec( return; SmallVector Users(Op->users()); for (auto *U : Users) { - if (!isa(U) || isa(U) || isSpvIntrinsic(U)) + if (!isa(U) || isSpvIntrinsic(U)) continue; if (!VisitedSubst.insert(std::make_pair(U, Op)).second) continue; @@ -958,6 +958,14 @@ void SPIRVEmitIntrinsics::deduceOperandElementType( return; Uncomplete = isTodoType(I); Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0)); + } else if (auto *Ref = dyn_cast(I)) { + if (!isPointerTy(I->getType())) + return; + KnownElemTy = GR->findDeducedElementType(I); + if (!KnownElemTy) + return; + Uncomplete = isTodoType(I); + Ops.push_back(std::make_pair(Ref->getOperand(0), 0)); } else if (auto *Ref = dyn_cast(I)) { if (GR->findDeducedElementType(Ref->getPointerOperand())) return; @@ -1030,7 +1038,6 @@ void SPIRVEmitIntrinsics::deduceOperandElementType( } } } - TypeValidated.insert(I); // Non-recursive update of types in the function uncomplete returns. // This may happen just once per a function, the latch is a pair of // findDeducedElementType(F) / addDeducedElementType(F, ...). @@ -1043,6 +1050,7 @@ void SPIRVEmitIntrinsics::deduceOperandElementType( } else if (UncompleteRets) { UncompleteRets->insert(I); } + TypeValidated.insert(I); return; } Uncomplete = isTodoType(CurrF); @@ -1369,10 +1377,6 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( Instruction *I, Value *Pointer, Type *ExpectedElementType, unsigned OperandToReplace, IRBuilder<> &B) { TypeValidated.insert(I); - // If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source - // pointer instead. The BitCastInst should be later removed when visited. - while (BitCastInst *BC = dyn_cast(Pointer)) - Pointer = BC->getOperand(0); // Do not emit spv_ptrcast if Pointer's element type is ExpectedElementType Type *PointerElemTy = deduceElementTypeHelper(Pointer, false); @@ -1759,8 +1763,7 @@ bool SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B, bool UnknownElemTypeI8) { reportFatalOnTokenType(I); - if (!isPointerTy(I->getType()) || !requireAssignType(I) || - isa(I)) + if (!isPointerTy(I->getType()) || !requireAssignType(I)) return false; setInsertPointAfterDef(B, I); @@ -1861,8 +1864,9 @@ void SPIRVEmitIntrinsics::insertSpirvDecorations(Instruction *I, void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, IRBuilder<> &B) { auto *II = dyn_cast(I); - if (II && II->getIntrinsicID() == Intrinsic::spv_const_composite && - TrackConstants) { + bool IsConstComposite = + II && II->getIntrinsicID() == Intrinsic::spv_const_composite; + if (IsConstComposite && TrackConstants) { setInsertPointAfterDef(B, I); auto t = AggrConsts.find(I); assert(t != AggrConsts.end()); @@ -1886,12 +1890,27 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, : B.SetInsertPoint(I); BPrepared = true; } + Type *OpTy = Op->getType(); Value *OpTyVal = Op; - if (Op->getType()->isTargetExtTy()) - OpTyVal = PoisonValue::get(Op->getType()); - auto *NewOp = buildIntrWithMD(Intrinsic::spv_track_constant, - {Op->getType(), OpTyVal->getType()}, Op, - OpTyVal, {}, B); + if (OpTy->isTargetExtTy()) + OpTyVal = PoisonValue::get(OpTy); + CallInst *NewOp = + buildIntrWithMD(Intrinsic::spv_track_constant, + {OpTy, OpTyVal->getType()}, Op, OpTyVal, {}, B); + Type *OpElemTy = nullptr; + if (!IsConstComposite && isPointerTy(OpTy) && + (OpElemTy = GR->findDeducedElementType(Op)) != nullptr && + OpElemTy != IntegerType::getInt8Ty(I->getContext())) { + buildAssignPtr(B, IntegerType::getInt8Ty(I->getContext()), NewOp); + SmallVector Types = {OpTy, OpTy}; + SmallVector Args = { + NewOp, buildMD(PoisonValue::get(OpElemTy)), + B.getInt32(getPointerAddressSpace(OpTy))}; + CallInst *PtrCasted = + B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args); + buildAssignPtr(B, OpElemTy, PtrCasted); + NewOp = PtrCasted; + } I->setOperand(OpNo, NewOp); } } @@ -2022,8 +2041,16 @@ void SPIRVEmitIntrinsics::processParamTypes(Function *F, IRBuilder<> &B) { if (!isUntypedPointerTy(Arg->getType())) continue; Type *ElemTy = GR->findDeducedElementType(Arg); - if (!ElemTy && (ElemTy = deduceFunParamElementType(F, OpIdx)) != nullptr) - buildAssignPtr(B, ElemTy, Arg); + if (!ElemTy && (ElemTy = deduceFunParamElementType(F, OpIdx)) != nullptr) { + if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Arg)) { + DenseSet> VisitedSubst; + updateAssignType(AssignCI, Arg, PoisonValue::get(ElemTy)); + propagateElemType(Arg, IntegerType::getInt8Ty(F->getContext()), + VisitedSubst); + } else { + buildAssignPtr(B, ElemTy, Arg); + } + } } } diff --git a/llvm/test/CodeGen/SPIRV/pointers/tangled-ret.ll b/llvm/test/CodeGen/SPIRV/pointers/tangled-ret.ll new file mode 100644 index 0000000000000..985893029db89 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/tangled-ret.ll @@ -0,0 +1,235 @@ +; The only pass criterion is that spirv-val considers output valid. + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +%subgr = type { i64, i64 } +%t_range = type { %t_arr } +%t_arr = type { [1 x i64] } +%t_arr2 = type { [4 x i32] } + +define internal spir_func noundef i32 @geti32() { +entry: + ret i32 100 +} + +define internal spir_func noundef i64 @geti64() { +entry: + ret i64 200 +} + +define internal spir_func void @enable_if(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %this, i64 noundef %dim0) { +entry: + %this.addr = alloca ptr addrspace(4), align 8 + %dim0.addr = alloca i64, align 8 + store ptr addrspace(4) %this, ptr %this.addr, align 8 + store i64 %dim0, ptr %dim0.addr, align 8 + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %0 = load i64, ptr %dim0.addr, align 8 + call spir_func void @enable_if_2(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %this1, i64 noundef %0) + ret void +} + + +define internal spir_func void @test(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %this, ptr addrspace(4) noundef align 4 dereferenceable(16) %bits, ptr noundef byval(%t_range) align 8 %pos) { +entry: + %this.addr = alloca ptr addrspace(4), align 8 + %bits.addr = alloca ptr addrspace(4), align 8 + %cur_pos = alloca i64, align 8 + %__range4 = alloca ptr addrspace(4), align 8 + %__begin0 = alloca ptr addrspace(4), align 8 + %__end0 = alloca ptr addrspace(4), align 8 + %cleanup.dest.slot = alloca i32, align 4 + %elem = alloca ptr addrspace(4), align 8 + %agg.tmp = alloca %t_range, align 8 + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr, align 8 + store ptr addrspace(4) %bits, ptr %bits.addr, align 8 + %pos.ascast = addrspacecast ptr %pos to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %call = call spir_func noundef i64 @getp(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %pos.ascast, i32 noundef 0) + store i64 %call, ptr %cur_pos, align 8 + %0 = load ptr addrspace(4), ptr %bits.addr, align 8 + store ptr addrspace(4) %0, ptr %__range4, align 8 + %1 = load ptr addrspace(4), ptr %__range4, align 8 + %call2 = call spir_func noundef ptr addrspace(4) @beginp(ptr addrspace(4) noundef align 4 dereferenceable_or_null(16) %1) + store ptr addrspace(4) %call2, ptr %__begin0, align 8 + %2 = load ptr addrspace(4), ptr %__range4, align 8 + %call3 = call spir_func noundef ptr addrspace(4) @endp(ptr addrspace(4) noundef align 4 dereferenceable_or_null(16) %2) + store ptr addrspace(4) %call3, ptr %__end0, align 8 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %3 = load ptr addrspace(4), ptr %__begin0, align 8 + %4 = load ptr addrspace(4), ptr %__end0, align 8 + %cmp = icmp ne ptr addrspace(4) %3, %4 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + %5 = load ptr addrspace(4), ptr %__begin0, align 8 + store ptr addrspace(4) %5, ptr %elem, align 8 + %6 = load i64, ptr %cur_pos, align 8 + %call4 = call spir_func noundef i32 @maskp(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %this1) + %conv = zext i32 %call4 to i64 + %cmp5 = icmp ult i64 %6, %conv + br i1 %cmp5, label %if.then, label %if.else + +if.then: ; preds = %for.body + %7 = load ptr addrspace(4), ptr %elem, align 8 + %8 = load i64, ptr %cur_pos, align 8 + call spir_func void @enable_if(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %agg.tmp.ascast, i64 noundef %8) + call spir_func void @extract_bits(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %this1, ptr addrspace(4) noundef align 4 dereferenceable(4) %7, ptr noundef byval(%t_range) align 8 %agg.tmp) + %9 = load i64, ptr %cur_pos, align 8 + %add = add i64 %9, 32 + store i64 %add, ptr %cur_pos, align 8 + br label %if.end + +if.else: ; preds = %for.body + %10 = load ptr addrspace(4), ptr %elem, align 8 + store i32 0, ptr addrspace(4) %10, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %11 = load ptr addrspace(4), ptr %__begin0, align 8 + %incdec.ptr = getelementptr inbounds nuw i32, ptr addrspace(4) %11, i32 1 + store ptr addrspace(4) %incdec.ptr, ptr %__begin0, align 8 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + ret void +} + +define internal spir_func noundef i64 @getp(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %this, i32 noundef %dimension) { +entry: + %this.addr.i = alloca ptr addrspace(4), align 8 + %dimension.addr.i = alloca i32, align 4 + %retval = alloca i64, align 8 + %this.addr = alloca ptr addrspace(4), align 8 + %dimension.addr = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr, align 8 + store i32 %dimension, ptr %dimension.addr, align 4 + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %0 = load i32, ptr %dimension.addr, align 4 + store ptr addrspace(4) %this1, ptr %this.addr.i, align 8 + store i32 %0, ptr %dimension.addr.i, align 4 + %this1.i = load ptr addrspace(4), ptr %this.addr.i, align 8 + %common_array1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load i32, ptr %dimension.addr, align 4 + %idxprom = sext i32 %1 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array1, i64 0, i64 %idxprom + %2 = load i64, ptr addrspace(4) %arrayidx, align 8 + ret i64 %2 +} + +define internal spir_func noundef ptr addrspace(4) @beginp(ptr addrspace(4) noundef align 4 dereferenceable_or_null(16) %this) { +entry: + %retval = alloca ptr addrspace(4), align 8 + %this.addr = alloca ptr addrspace(4), align 8 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr, align 8 + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %MData1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %arraydecay2 = bitcast ptr addrspace(4) %MData1 to ptr addrspace(4) + ret ptr addrspace(4) %arraydecay2 +} + +define internal spir_func noundef ptr addrspace(4) @endp(ptr addrspace(4) noundef align 4 dereferenceable_or_null(16) %this) { +entry: + %retval = alloca ptr addrspace(4), align 8 + %this.addr = alloca ptr addrspace(4), align 8 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr, align 8 + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %MData1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %arraydecay2 = bitcast ptr addrspace(4) %MData1 to ptr addrspace(4) + %add.ptr = getelementptr inbounds nuw i32, ptr addrspace(4) %arraydecay2, i64 4 + ret ptr addrspace(4) %add.ptr +} + +define internal spir_func noundef i32 @maskp(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %this) { +entry: + %retval = alloca i32, align 4 + %this.addr = alloca ptr addrspace(4), align 8 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr, align 8 + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %bits_num = getelementptr inbounds nuw %subgr, ptr addrspace(4) %this1, i32 0, i32 1 + %0 = load i64, ptr addrspace(4) %bits_num, align 8 + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +define internal spir_func void @enable_if_2(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %this, i64 noundef %dim0) { +entry: + %this.addr = alloca ptr addrspace(4), align 8 + %dim0.addr = alloca i64, align 8 + store ptr addrspace(4) %this, ptr %this.addr, align 8 + store i64 %dim0, ptr %dim0.addr, align 8 + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %common_array1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %dim0.addr, align 8 + store i64 %0, ptr addrspace(4) %common_array1, align 8 + ret void +} + +define internal spir_func void @extract_bits(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %this, ptr addrspace(4) noundef align 4 dereferenceable(4) %bits, ptr noundef byval(%t_range) align 8 %pos) { +entry: + %this.addr = alloca ptr addrspace(4), align 8 + %bits.addr = alloca ptr addrspace(4), align 8 + %Res = alloca i64, align 8 + store ptr addrspace(4) %this, ptr %this.addr, align 8 + store ptr addrspace(4) %bits, ptr %bits.addr, align 8 + %pos.ascast = addrspacecast ptr %pos to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr, align 8 + %Bits1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr addrspace(4) %Bits1, align 8 + store i64 %0, ptr %Res, align 8 + %bits_num = getelementptr inbounds nuw %subgr, ptr addrspace(4) %this1, i32 0, i32 1 + %1 = load i64, ptr addrspace(4) %bits_num, align 8 + %call = call spir_func noundef i64 @geti64() + %2 = load i64, ptr %Res, align 8 + %and = and i64 %2, %call + store i64 %and, ptr %Res, align 8 + %call2 = call spir_func noundef i64 @geti64() + %call3 = call spir_func noundef i32 @geti32() + %conv = zext i32 %call3 to i64 + %cmp = icmp ult i64 %call2, %conv + br i1 %cmp, label %if.then, label %if.else + +if.else: ; preds = %entry + %3 = load ptr addrspace(4), ptr %bits.addr, align 8 + store i32 0, ptr addrspace(4) %3, align 4 + br label %if.end11 + +if.then: ; preds = %entry + %call4 = call spir_func noundef i64 @geti64() + %cmp5 = icmp ugt i64 %call4, 0 + br i1 %cmp5, label %if.then6, label %if.end + +if.then6: ; preds = %if.then + %call7 = call spir_func noundef i64 @geti64() + %4 = load i64, ptr %Res, align 8 + %shr = lshr i64 %4, %call7 + store i64 %shr, ptr %Res, align 8 + br label %if.end + +if.end: ; preds = %if.then6, %if.then + %call8 = call spir_func noundef i64 @geti64() + %5 = load i64, ptr %Res, align 8 + %and9 = and i64 %5, %call8 + store i64 %and9, ptr %Res, align 8 + %6 = load i64, ptr %Res, align 8 + %conv10 = trunc i64 %6 to i32 + %7 = load ptr addrspace(4), ptr %bits.addr, align 8 + store i32 %conv10, ptr addrspace(4) %7, align 4 + br label %if.end11 + +if.end11: ; preds = %if.else, %if.end + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpPhi_ArgumentsPlaceholders.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpPhi_ArgumentsPlaceholders.ll index c98fef3631e04..ee5596ed38b1b 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpPhi_ArgumentsPlaceholders.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpPhi_ArgumentsPlaceholders.ll @@ -12,7 +12,8 @@ ;; } ;; } -; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; XFAIL: * %struct.Node = type { %struct.Node.0 addrspace(1)* } @@ -25,8 +26,8 @@ entry: for.cond: ; preds = %for.inc, %entry %pNode.0 = phi %struct.Node addrspace(1)* [ %pNodes, %entry ], [ %1, %for.inc ] %j.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] -; CHECK-SPIRV: %[[#]] = OpPhi %[[#]] %[[#]] %[[#]] %[[#BitcastResultId:]] %[[#]] -; CHECK-SPIRV-NEXT: OpPhi +; CHECK: %[[#]] = OpPhi %[[#]] %[[#]] %[[#]] %[[#BitcastResultId:]] %[[#]] +; CHECK-NEXT: OpPhi %cmp = icmp slt i32 %j.0, 10 br i1 %cmp, label %for.body, label %for.end @@ -36,8 +37,8 @@ for.body: ; preds = %for.cond %0 = load %struct.Node.0 addrspace(1)*, %struct.Node.0 addrspace(1)* addrspace(1)* %pNext, align 4 %1 = bitcast %struct.Node.0 addrspace(1)* %0 to %struct.Node addrspace(1)* -; CHECK-SPIRV: %[[#LoadResultId:]] = OpLoad %[[#]] -; CHECK-SPIRV: %[[#BitcastResultId]] = OpBitcast %[[#]] %[[#LoadResultId]] +; CHECK: %[[#LoadResultId:]] = OpLoad %[[#]] +; CHECK: %[[#BitcastResultId]] = OpBitcast %[[#]] %[[#LoadResultId]] br label %for.inc diff --git a/llvm/test/CodeGen/SPIRV/validate/sycl-tangle-group-algorithms.ll b/llvm/test/CodeGen/SPIRV/validate/sycl-tangle-group-algorithms.ll new file mode 100644 index 0000000000000..b6b919f36d92c --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/validate/sycl-tangle-group-algorithms.ll @@ -0,0 +1,4673 @@ +; This is an excerpt from the SYCL end-to-end test suite, cleaned out from +; unrelevant details, that reproduced cases of invalid SPIR-V generation due +; to wrong types, deduced from the input LLVM IR. Namely, this test case covers +; cases of type mismatch when null pointer constant is used in different +; contexts and so with different pointee types, and intertwined +; load/store/function call LLVM IR input with bitcasts inserted between +; instruction uses. + +; The only pass criterion is that spirv-val considers output valid. + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64v1.5-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +%"nd_item" = type { i8 } +%struct.AssertHappened = type { i32, [257 x i8], [257 x i8], [129 x i8], i32, i64, i64, i64, i64, i64, i64 } +%"range" = type { %"detail::array" } +%"detail::array" = type { [1 x i64] } +%class.anon = type { %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor" } +%"accessor" = type { %"detail::AccessorImplDevice", %union.anon } +%"detail::AccessorImplDevice" = type { %"range", %"range", %"range" } +%union.anon = type { ptr addrspace(1) } +%class.anon.6 = type { ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), ptr addrspace(4) } +%"group" = type { %"range", %"range", %"range", %"range" } +%"item" = type { %"detail::AccessorImplDevice" } +%"item.22" = type { %"sd_ItemBase.23" } +%"sd_ItemBase.23" = type { %"range", %"range" } +%"tangle_group" = type { %"ss_sub_group_mask" } +%"ss_sub_group_mask" = type { i64, i64 } +%class.anon.8 = type { %"accessor", %"accessor", [8 x i8], %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor", %"accessor" } +%"vec.16" = type { %"struct.std::array.20" } +%"struct.std::array.20" = type { [4 x i32] } +%class.anon.15 = type { ptr addrspace(4), ptr addrspace(4), ptr addrspace(4) } +%class.anon.7 = type { ptr addrspace(4), ptr addrspace(4) } + +@.str = private unnamed_addr addrspace(1) constant [21 x i8] c"bits_num <= max_bits\00", align 1 +@.str.1 = private unnamed_addr addrspace(1) constant [17 x i8] c"subgroupmask.hpp\00", align 1 +@__PRETTY_FUNCTION1 = private unnamed_addr addrspace(1) constant [32 x i8] c"subgroup_mask(BitsType, size_t)\00", align 1 +@.str.2 = private unnamed_addr addrspace(1) constant [15 x i8] c"bn <= max_bits\00", align 1 +@__PRETTY_FUNCTION2 = private unnamed_addr addrspace(1) constant [52 x i8] c"BitsType subgroup_mask::valuable_bits(size_t) const\00", align 1 +@__spirv_BuiltInSubgroupMaxSize = external dso_local addrspace(1) constant i32, align 4 +@__spirv_BuiltInSubgroupLocalInvocationId = external dso_local addrspace(1) constant i32, align 4 +@_ZSt6ignore = linkonce_odr dso_local addrspace(1) constant %"nd_item" undef, align 1 +@__spirv_BuiltInNumWorkgroups = external dso_local addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external dso_local addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalInvocationId = external dso_local addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalSize = external dso_local addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local addrspace(1) constant <3 x i64>, align 32 +@SPIR_AssertHappenedMem = linkonce_odr dso_local addrspace(1) global %struct.AssertHappened zeroinitializer +@__spirv_BuiltInWorkgroupId = external dso_local addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInWorkgroupSize = external dso_local addrspace(1) constant <3 x i64>, align 32 + + +define weak_odr dso_local spir_kernel void @TestKernel(ptr addrspace(1) %_arg_TmpAcc, ptr byval(%"range") %_arg_TmpAcc1, ptr byval(%"range") %_arg_TmpAcc2, ptr byval(%"range") %_arg_TmpAcc3, ptr addrspace(1) align 1 %_arg_BarrierAcc, ptr byval(%"range") %_arg_BarrierAcc4, ptr byval(%"range") %_arg_BarrierAcc5, ptr byval(%"range") %_arg_BarrierAcc6, ptr addrspace(1) align 1 %_arg_BroadcastAcc, ptr byval(%"range") %_arg_BroadcastAcc7, ptr byval(%"range") %_arg_BroadcastAcc8, ptr byval(%"range") %_arg_BroadcastAcc9, ptr addrspace(1) align 1 %_arg_AnyAcc, ptr byval(%"range") %_arg_AnyAcc10, ptr byval(%"range") %_arg_AnyAcc11, ptr byval(%"range") %_arg_AnyAcc12, ptr addrspace(1) align 1 %_arg_AllAcc, ptr byval(%"range") %_arg_AllAcc13, ptr byval(%"range") %_arg_AllAcc14, ptr byval(%"range") %_arg_AllAcc15, ptr addrspace(1) align 1 %_arg_NoneAcc, ptr byval(%"range") %_arg_NoneAcc16, ptr byval(%"range") %_arg_NoneAcc17, ptr byval(%"range") %_arg_NoneAcc18, ptr addrspace(1) align 1 %_arg_ReduceAcc, ptr byval(%"range") %_arg_ReduceAcc19, ptr byval(%"range") %_arg_ReduceAcc20, ptr byval(%"range") %_arg_ReduceAcc21, ptr addrspace(1) align 1 %_arg_ExScanAcc, ptr byval(%"range") %_arg_ExScanAcc22, ptr byval(%"range") %_arg_ExScanAcc23, ptr byval(%"range") %_arg_ExScanAcc24, ptr addrspace(1) align 1 %_arg_IncScanAcc, ptr byval(%"range") %_arg_IncScanAcc25, ptr byval(%"range") %_arg_IncScanAcc26, ptr byval(%"range") %_arg_IncScanAcc27, ptr addrspace(1) align 1 %_arg_ShiftLeftAcc, ptr byval(%"range") %_arg_ShiftLeftAcc28, ptr byval(%"range") %_arg_ShiftLeftAcc29, ptr byval(%"range") %_arg_ShiftLeftAcc30, ptr addrspace(1) align 1 %_arg_ShiftRightAcc, ptr byval(%"range") %_arg_ShiftRightAcc31, ptr byval(%"range") %_arg_ShiftRightAcc32, ptr byval(%"range") %_arg_ShiftRightAcc33, ptr addrspace(1) align 1 %_arg_SelectAcc, ptr byval(%"range") %_arg_SelectAcc34, ptr byval(%"range") %_arg_SelectAcc35, ptr byval(%"range") %_arg_SelectAcc36, ptr addrspace(1) align 1 %_arg_PermuteXorAcc, ptr byval(%"range") %_arg_PermuteXorAcc37, ptr byval(%"range") %_arg_PermuteXorAcc38, ptr byval(%"range") %_arg_PermuteXorAcc39) { +entry: + %_arg_TmpAcc.addr = alloca ptr addrspace(1) + %_arg_BarrierAcc.addr = alloca ptr addrspace(1) + %_arg_BroadcastAcc.addr = alloca ptr addrspace(1) + %_arg_AnyAcc.addr = alloca ptr addrspace(1) + %_arg_AllAcc.addr = alloca ptr addrspace(1) + %_arg_NoneAcc.addr = alloca ptr addrspace(1) + %_arg_ReduceAcc.addr = alloca ptr addrspace(1) + %_arg_ExScanAcc.addr = alloca ptr addrspace(1) + %_arg_IncScanAcc.addr = alloca ptr addrspace(1) + %_arg_ShiftLeftAcc.addr = alloca ptr addrspace(1) + %_arg_ShiftRightAcc.addr = alloca ptr addrspace(1) + %_arg_SelectAcc.addr = alloca ptr addrspace(1) + %_arg_PermuteXorAcc.addr = alloca ptr addrspace(1) + %Kernel = alloca %class.anon + %agg.tmp = alloca %"range" + %agg.tmp41 = alloca %"range" + %agg.tmp42 = alloca %"range" + %agg.tmp44 = alloca %"range" + %agg.tmp45 = alloca %"range" + %agg.tmp46 = alloca %"range" + %agg.tmp48 = alloca %"range" + %agg.tmp49 = alloca %"range" + %agg.tmp50 = alloca %"range" + %agg.tmp52 = alloca %"range" + %agg.tmp53 = alloca %"range" + %agg.tmp54 = alloca %"range" + %agg.tmp56 = alloca %"range" + %agg.tmp57 = alloca %"range" + %agg.tmp58 = alloca %"range" + %agg.tmp60 = alloca %"range" + %agg.tmp61 = alloca %"range" + %agg.tmp62 = alloca %"range" + %agg.tmp64 = alloca %"range" + %agg.tmp65 = alloca %"range" + %agg.tmp66 = alloca %"range" + %agg.tmp68 = alloca %"range" + %agg.tmp69 = alloca %"range" + %agg.tmp70 = alloca %"range" + %agg.tmp72 = alloca %"range" + %agg.tmp73 = alloca %"range" + %agg.tmp74 = alloca %"range" + %agg.tmp76 = alloca %"range" + %agg.tmp77 = alloca %"range" + %agg.tmp78 = alloca %"range" + %agg.tmp80 = alloca %"range" + %agg.tmp81 = alloca %"range" + %agg.tmp82 = alloca %"range" + %agg.tmp84 = alloca %"range" + %agg.tmp85 = alloca %"range" + %agg.tmp86 = alloca %"range" + %agg.tmp88 = alloca %"range" + %agg.tmp89 = alloca %"range" + %agg.tmp90 = alloca %"range" + %agg.tmp91 = alloca %"nd_item", align 1 + %Kernel.ascast = addrspacecast ptr %Kernel to ptr addrspace(4) + %agg.tmp91.ascast = addrspacecast ptr %agg.tmp91 to ptr addrspace(4) + store ptr addrspace(1) %_arg_TmpAcc, ptr %_arg_TmpAcc.addr + store ptr addrspace(1) %_arg_BarrierAcc, ptr %_arg_BarrierAcc.addr + store ptr addrspace(1) %_arg_BroadcastAcc, ptr %_arg_BroadcastAcc.addr + store ptr addrspace(1) %_arg_AnyAcc, ptr %_arg_AnyAcc.addr + store ptr addrspace(1) %_arg_AllAcc, ptr %_arg_AllAcc.addr + store ptr addrspace(1) %_arg_NoneAcc, ptr %_arg_NoneAcc.addr + store ptr addrspace(1) %_arg_ReduceAcc, ptr %_arg_ReduceAcc.addr + store ptr addrspace(1) %_arg_ExScanAcc, ptr %_arg_ExScanAcc.addr + store ptr addrspace(1) %_arg_IncScanAcc, ptr %_arg_IncScanAcc.addr + store ptr addrspace(1) %_arg_ShiftLeftAcc, ptr %_arg_ShiftLeftAcc.addr + store ptr addrspace(1) %_arg_ShiftRightAcc, ptr %_arg_ShiftRightAcc.addr + store ptr addrspace(1) %_arg_SelectAcc, ptr %_arg_SelectAcc.addr + store ptr addrspace(1) %_arg_PermuteXorAcc, ptr %_arg_PermuteXorAcc.addr + %TmpAcc1 = bitcast ptr addrspace(4) %Kernel.ascast to ptr addrspace(4) + call spir_func void @Foo1(ptr addrspace(4) %TmpAcc1) + %BarrierAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 1 + call spir_func void @Foo2(ptr addrspace(4) %BarrierAcc) + %BroadcastAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 2 + call spir_func void @Foo2(ptr addrspace(4) %BroadcastAcc) + %AnyAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 3 + call spir_func void @Foo2(ptr addrspace(4) %AnyAcc) + %AllAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 4 + call spir_func void @Foo2(ptr addrspace(4) %AllAcc) + %NoneAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 5 + call spir_func void @Foo2(ptr addrspace(4) %NoneAcc) + %ReduceAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 6 + call spir_func void @Foo2(ptr addrspace(4) %ReduceAcc) + %ExScanAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 7 + call spir_func void @Foo2(ptr addrspace(4) %ExScanAcc) + %IncScanAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 8 + call spir_func void @Foo2(ptr addrspace(4) %IncScanAcc) + %ShiftLeftAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 9 + call spir_func void @Foo2(ptr addrspace(4) %ShiftLeftAcc) + %ShiftRightAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 10 + call spir_func void @Foo2(ptr addrspace(4) %ShiftRightAcc) + %SelectAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 11 + call spir_func void @Foo2(ptr addrspace(4) %SelectAcc) + %PermuteXorAcc = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 12 + call spir_func void @Foo2(ptr addrspace(4) %PermuteXorAcc) + %TmpAcc402 = bitcast ptr addrspace(4) %Kernel.ascast to ptr addrspace(4) + %0 = load ptr addrspace(1), ptr %_arg_TmpAcc.addr + call spir_func void @Foo3(ptr addrspace(4) %TmpAcc402, ptr addrspace(1) %0, ptr byval(%"range") %agg.tmp, ptr byval(%"range") %agg.tmp41, ptr byval(%"range") %agg.tmp42) + %BarrierAcc43 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 1 + %1 = load ptr addrspace(1), ptr %_arg_BarrierAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %BarrierAcc43, ptr addrspace(1) %1, ptr byval(%"range") %agg.tmp44, ptr byval(%"range") %agg.tmp45, ptr byval(%"range") %agg.tmp46) + %BroadcastAcc47 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 2 + %2 = load ptr addrspace(1), ptr %_arg_BroadcastAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %BroadcastAcc47, ptr addrspace(1) %2, ptr byval(%"range") %agg.tmp48, ptr byval(%"range") %agg.tmp49, ptr byval(%"range") %agg.tmp50) + %AnyAcc51 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 3 + %3 = load ptr addrspace(1), ptr %_arg_AnyAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %AnyAcc51, ptr addrspace(1) %3, ptr byval(%"range") %agg.tmp52, ptr byval(%"range") %agg.tmp53, ptr byval(%"range") %agg.tmp54) + %AllAcc55 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 4 + %4 = load ptr addrspace(1), ptr %_arg_AllAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %AllAcc55, ptr addrspace(1) %4, ptr byval(%"range") %agg.tmp56, ptr byval(%"range") %agg.tmp57, ptr byval(%"range") %agg.tmp58) + %NoneAcc59 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 5 + %5 = load ptr addrspace(1), ptr %_arg_NoneAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %NoneAcc59, ptr addrspace(1) %5, ptr byval(%"range") %agg.tmp60, ptr byval(%"range") %agg.tmp61, ptr byval(%"range") %agg.tmp62) + %ReduceAcc63 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 6 + %6 = load ptr addrspace(1), ptr %_arg_ReduceAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %ReduceAcc63, ptr addrspace(1) %6, ptr byval(%"range") %agg.tmp64, ptr byval(%"range") %agg.tmp65, ptr byval(%"range") %agg.tmp66) + %ExScanAcc67 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 7 + %7 = load ptr addrspace(1), ptr %_arg_ExScanAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %ExScanAcc67, ptr addrspace(1) %7, ptr byval(%"range") %agg.tmp68, ptr byval(%"range") %agg.tmp69, ptr byval(%"range") %agg.tmp70) + %IncScanAcc71 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 8 + %8 = load ptr addrspace(1), ptr %_arg_IncScanAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %IncScanAcc71, ptr addrspace(1) %8, ptr byval(%"range") %agg.tmp72, ptr byval(%"range") %agg.tmp73, ptr byval(%"range") %agg.tmp74) + %ShiftLeftAcc75 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 9 + %9 = load ptr addrspace(1), ptr %_arg_ShiftLeftAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %ShiftLeftAcc75, ptr addrspace(1) %9, ptr byval(%"range") %agg.tmp76, ptr byval(%"range") %agg.tmp77, ptr byval(%"range") %agg.tmp78) + %ShiftRightAcc79 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 10 + %10 = load ptr addrspace(1), ptr %_arg_ShiftRightAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %ShiftRightAcc79, ptr addrspace(1) %10, ptr byval(%"range") %agg.tmp80, ptr byval(%"range") %agg.tmp81, ptr byval(%"range") %agg.tmp82) + %SelectAcc83 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 11 + %11 = load ptr addrspace(1), ptr %_arg_SelectAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %SelectAcc83, ptr addrspace(1) %11, ptr byval(%"range") %agg.tmp84, ptr byval(%"range") %agg.tmp85, ptr byval(%"range") %agg.tmp86) + %PermuteXorAcc87 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %Kernel.ascast, i32 0, i32 12 + %12 = load ptr addrspace(1), ptr %_arg_PermuteXorAcc.addr + call spir_func void @Foo4(ptr addrspace(4) %PermuteXorAcc87, ptr addrspace(1) %12, ptr byval(%"range") %agg.tmp88, ptr byval(%"range") %agg.tmp89, ptr byval(%"range") %agg.tmp90) + %call = call spir_func ptr addrspace(4) @Foo5() + call spir_func void @Foo6(ptr addrspace(4) dead_on_unwind writable sret(%"nd_item") align 1 %agg.tmp91.ascast, ptr addrspace(4) %call) + call spir_func void @Foo22(ptr addrspace(4) %Kernel.ascast, ptr byval(%"nd_item") align 1 %agg.tmp91) + ret void +} + +define internal spir_func void @Foo1(ptr addrspace(4) %this) { +entry: + %this.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"range" + %agg.tmp2 = alloca %"range" + %agg.tmp3 = alloca %"range" + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + %agg.tmp2.ascast = addrspacecast ptr %agg.tmp2 to ptr addrspace(4) + %agg.tmp3.ascast = addrspacecast ptr %agg.tmp3 to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + call void @llvm.memset.p0.i64(ptr %agg.tmp, i8 0, i64 8, i1 false) + call spir_func void @Foo11(ptr addrspace(4) %agg.tmp.ascast) + call spir_func void @Foo12(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp2.ascast) + call spir_func void @Foo12(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp3.ascast) + call spir_func void @Foo10(ptr addrspace(4) %impl1, ptr byval(%"range") %agg.tmp, ptr byval(%"range") %agg.tmp2, ptr byval(%"range") %agg.tmp3) + ret void +} + + +define internal spir_func void @Foo2(ptr addrspace(4) %this) { +entry: + %this.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"range" + %agg.tmp2 = alloca %"range" + %agg.tmp3 = alloca %"range" + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + %agg.tmp2.ascast = addrspacecast ptr %agg.tmp2 to ptr addrspace(4) + %agg.tmp3.ascast = addrspacecast ptr %agg.tmp3 to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + call void @llvm.memset.p0.i64(ptr %agg.tmp, i8 0, i64 8, i1 false) + call spir_func void @Foo11(ptr addrspace(4) %agg.tmp.ascast) + call spir_func void @Foo12(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp2.ascast) + call spir_func void @Foo12(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp3.ascast) + call spir_func void @Foo10(ptr addrspace(4) %impl1, ptr byval(%"range") %agg.tmp, ptr byval(%"range") %agg.tmp2, ptr byval(%"range") %agg.tmp3) + ret void +} + + + + +define internal spir_func void @Foo3(ptr addrspace(4) %this, ptr addrspace(1) %Ptr, ptr byval(%"range") %AccessRange, ptr byval(%"range") %MemRange, ptr byval(%"range") %Offset) { +entry: + %this.addr = alloca ptr addrspace(4) + %Ptr.addr = alloca ptr addrspace(1) + %ref.tmp = alloca %class.anon.6 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(1) %Ptr, ptr %Ptr.addr + %AccessRange.ascast = addrspacecast ptr %AccessRange to ptr addrspace(4) + %MemRange.ascast = addrspacecast ptr %MemRange to ptr addrspace(4) + %Offset.ascast = addrspacecast ptr %Offset to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load ptr addrspace(1), ptr %Ptr.addr + %1 = getelementptr inbounds nuw %"accessor", ptr addrspace(4) %this1, i32 0, i32 1 + store ptr addrspace(1) %0, ptr addrspace(4) %1 + %2 = bitcast ptr %ref.tmp to ptr + store ptr addrspace(4) %this1, ptr %2 + %Offset2 = getelementptr inbounds %class.anon.6, ptr %ref.tmp, i32 0, i32 1 + store ptr addrspace(4) %Offset.ascast, ptr %Offset2 + %AccessRange3 = getelementptr inbounds %class.anon.6, ptr %ref.tmp, i32 0, i32 2 + store ptr addrspace(4) %AccessRange.ascast, ptr %AccessRange3 + %MemRange4 = getelementptr inbounds %class.anon.6, ptr %ref.tmp, i32 0, i32 3 + store ptr addrspace(4) %MemRange.ascast, ptr %MemRange4 + call spir_func void @Foo13(ptr addrspace(4) %ref.tmp.ascast) + %call = call spir_func i64 @Foo21(ptr addrspace(4) %this1) + %3 = getelementptr inbounds nuw %"accessor", ptr addrspace(4) %this1, i32 0, i32 1 + %4 = load ptr addrspace(1), ptr addrspace(4) %3 + %add.ptr = getelementptr inbounds nuw i64, ptr addrspace(1) %4, i64 %call + store ptr addrspace(1) %add.ptr, ptr addrspace(4) %3 + ret void +} + + +define internal spir_func void @Foo4(ptr addrspace(4) %this, ptr addrspace(1) %Ptr, ptr byval(%"range") %AccessRange, ptr byval(%"range") %MemRange, ptr byval(%"range") %Offset) { +entry: + %this.addr = alloca ptr addrspace(4) + %Ptr.addr = alloca ptr addrspace(1) + %ref.tmp = alloca %class.anon.6 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(1) %Ptr, ptr %Ptr.addr + %AccessRange.ascast = addrspacecast ptr %AccessRange to ptr addrspace(4) + %MemRange.ascast = addrspacecast ptr %MemRange to ptr addrspace(4) + %Offset.ascast = addrspacecast ptr %Offset to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load ptr addrspace(1), ptr %Ptr.addr + %1 = getelementptr inbounds nuw %"accessor", ptr addrspace(4) %this1, i32 0, i32 1 + store ptr addrspace(1) %0, ptr addrspace(4) %1 + %2 = bitcast ptr %ref.tmp to ptr + store ptr addrspace(4) %this1, ptr %2 + %Offset2 = getelementptr inbounds %class.anon.6, ptr %ref.tmp, i32 0, i32 1 + store ptr addrspace(4) %Offset.ascast, ptr %Offset2 + %AccessRange3 = getelementptr inbounds %class.anon.6, ptr %ref.tmp, i32 0, i32 2 + store ptr addrspace(4) %AccessRange.ascast, ptr %AccessRange3 + %MemRange4 = getelementptr inbounds %class.anon.6, ptr %ref.tmp, i32 0, i32 3 + store ptr addrspace(4) %MemRange.ascast, ptr %MemRange4 + call spir_func void @Foo30(ptr addrspace(4) %ref.tmp.ascast) + %call = call spir_func i64 @Foo32(ptr addrspace(4) %this1) + %3 = getelementptr inbounds nuw %"accessor", ptr addrspace(4) %this1, i32 0, i32 1 + %4 = load ptr addrspace(1), ptr addrspace(4) %3 + %add.ptr = getelementptr inbounds nuw i8, ptr addrspace(1) %4, i64 %call + store ptr addrspace(1) %add.ptr, ptr addrspace(4) %3 + ret void +} + + +define internal spir_func ptr addrspace(4) @Foo5() { +entry: + %retval = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + ret ptr addrspace(4) null +} + + +define internal spir_func void @Foo6(ptr addrspace(4) dead_on_unwind noalias writable sret(%"nd_item") align 1 %agg.result, ptr addrspace(4) %0) { +entry: + %.addr = alloca ptr addrspace(4) + %GlobalSize = alloca %"range" + %LocalSize = alloca %"range" + %GroupRange = alloca %"range" + %GroupId = alloca %"range" + %GlobalId = alloca %"range" + %LocalId = alloca %"range" + %GlobalOffset = alloca %"range" + %Group = alloca %"group" + %GlobalItem = alloca %"item" + %LocalItem = alloca %"item.22" + %cleanup.dest.slot = alloca i32, align 4 + %GlobalSize.ascast = addrspacecast ptr %GlobalSize to ptr addrspace(4) + %LocalSize.ascast = addrspacecast ptr %LocalSize to ptr addrspace(4) + %GroupRange.ascast = addrspacecast ptr %GroupRange to ptr addrspace(4) + %GroupId.ascast = addrspacecast ptr %GroupId to ptr addrspace(4) + %GlobalId.ascast = addrspacecast ptr %GlobalId to ptr addrspace(4) + %LocalId.ascast = addrspacecast ptr %LocalId to ptr addrspace(4) + %GlobalOffset.ascast = addrspacecast ptr %GlobalOffset to ptr addrspace(4) + %Group.ascast = addrspacecast ptr %Group to ptr addrspace(4) + %GlobalItem.ascast = addrspacecast ptr %GlobalItem to ptr addrspace(4) + %LocalItem.ascast = addrspacecast ptr %LocalItem to ptr addrspace(4) + store ptr addrspace(4) %0, ptr %.addr + call spir_func void @Foo7(ptr addrspace(4) dead_on_unwind writable sret(%"range") %GlobalSize.ascast) + call spir_func void @Init1(ptr addrspace(4) dead_on_unwind writable sret(%"range") %LocalSize.ascast) + call spir_func void @Init2(ptr addrspace(4) dead_on_unwind writable sret(%"range") %GroupRange.ascast) + call spir_func void @Init3(ptr addrspace(4) dead_on_unwind writable sret(%"range") %GroupId.ascast) + call spir_func void @Init6(ptr addrspace(4) dead_on_unwind writable sret(%"range") %GlobalId.ascast) + call spir_func void @Init4(ptr addrspace(4) dead_on_unwind writable sret(%"range") %LocalId.ascast) + call spir_func void @Init5(ptr addrspace(4) dead_on_unwind writable sret(%"range") %GlobalOffset.ascast) + call spir_func void @Foo23(ptr addrspace(4) dead_on_unwind writable sret(%"group") %Group.ascast, ptr addrspace(4) %GlobalSize.ascast, ptr addrspace(4) %LocalSize.ascast, ptr addrspace(4) %GroupRange.ascast, ptr addrspace(4) %GroupId.ascast) + call spir_func void @Foo24(ptr addrspace(4) dead_on_unwind writable sret(%"item") %GlobalItem.ascast, ptr addrspace(4) %GlobalSize.ascast, ptr addrspace(4) %GlobalId.ascast, ptr addrspace(4) %GlobalOffset.ascast) + call spir_func void @Foo25(ptr addrspace(4) dead_on_unwind writable sret(%"item.22") %LocalItem.ascast, ptr addrspace(4) %LocalSize.ascast, ptr addrspace(4) %LocalId.ascast) + call spir_func void @Foo26(ptr addrspace(4) dead_on_unwind writable sret(%"nd_item") align 1 %agg.result, ptr addrspace(4) %GlobalItem.ascast, ptr addrspace(4) %LocalItem.ascast, ptr addrspace(4) %Group.ascast) + ret void +} + + +define internal spir_func void @Foo22(ptr addrspace(4) %this, ptr byval(%"nd_item") align 1 %item) { +entry: + %this.addr.i76 = alloca ptr addrspace(4) + %WI.addr.i = alloca i64 + %TangleLeader.addr.i = alloca i64 + %TangleSize.addr.i = alloca i64 + %agg.tmp.i = alloca %"range" + %agg.tmp2.i = alloca %"tangle_group" + %Visible.i = alloca i64 + %Other.i = alloca i64 + %agg.tmp5.i = alloca %"range" + %agg.tmp8.i = alloca %"range" + %OriginalLID.i = alloca i32, align 4 + %LID.i = alloca i32, align 4 + %BroadcastResult.i = alloca i32, align 4 + %agg.tmp12.i = alloca %"tangle_group" + %agg.tmp15.i = alloca %"range" + %AnyResult.i = alloca i8, align 1 + %agg.tmp18.i = alloca %"tangle_group" + %agg.tmp24.i = alloca %"range" + %AllResult.i = alloca i8, align 1 + %agg.tmp27.i = alloca %"tangle_group" + %agg.tmp35.i = alloca %"range" + %NoneResult.i = alloca i8, align 1 + %agg.tmp38.i = alloca %"tangle_group" + %agg.tmp46.i = alloca %"range" + %ReduceResult.i = alloca i32, align 4 + %agg.tmp49.i = alloca %"tangle_group" + %agg.tmp50.i = alloca %"nd_item", align 1 + %agg.tmp54.i = alloca %"range" + %ExScanResult.i = alloca i32, align 4 + %agg.tmp57.i = alloca %"tangle_group" + %agg.tmp58.i = alloca %"nd_item", align 1 + %agg.tmp61.i = alloca %"range" + %IncScanResult.i = alloca i32, align 4 + %agg.tmp64.i = alloca %"tangle_group" + %agg.tmp65.i = alloca %"nd_item", align 1 + %agg.tmp69.i = alloca %"range" + %ShiftLeftResult.i = alloca i32, align 4 + %agg.tmp72.i = alloca %"tangle_group" + %agg.tmp79.i = alloca %"range" + %ShiftRightResult.i = alloca i32, align 4 + %agg.tmp82.i = alloca %"tangle_group" + %agg.tmp88.i = alloca %"range" + %SelectResult.i = alloca i32, align 4 + %agg.tmp91.i = alloca %"tangle_group" + %agg.tmp92.i = alloca %"range" + %ref.tmp.i = alloca %"range" + %ref.tmp93.i = alloca %"range" + %ref.tmp94.i = alloca i32, align 4 + %agg.tmp100.i = alloca %"range" + %PermuteXorResult.i = alloca i32, align 4 + %agg.tmp103.i = alloca %"tangle_group" + %agg.tmp106.i = alloca %"range" + %agg.tmp18.ascast.ascast75 = alloca %"nd_item" + %agg.tmp17.ascast.ascast74 = alloca %"tangle_group" + %retval.i66 = alloca i64 + %this.addr.i67 = alloca ptr addrspace(4) + %Result.i68 = alloca i64 + %retval.i58 = alloca i64 + %this.addr.i59 = alloca ptr addrspace(4) + %Result.i60 = alloca i64 + %retval.i50 = alloca i64 + %this.addr.i51 = alloca ptr addrspace(4) + %Result.i52 = alloca i64 + %retval.i42 = alloca i64 + %this.addr.i43 = alloca ptr addrspace(4) + %Result.i44 = alloca i64 + %retval.i = alloca i64 + %this.addr.i = alloca ptr addrspace(4) + %Result.i = alloca i64 + %this.addr = alloca ptr addrspace(4) + %WI = alloca %"range" + %SG = alloca %"nd_item", align 1 + %BranchBody = alloca %class.anon.8 + %ref.tmp = alloca %"range" + %ref.tmp15 = alloca i32, align 4 + %Tangle = alloca %"tangle_group" + %agg.tmp = alloca %"nd_item", align 1 + %TangleLeader = alloca i64 + %TangleSize = alloca i64 + %IsMember = alloca %"nd_item", align 1 + %agg.tmp17 = alloca %"tangle_group" + %agg.tmp18 = alloca %"nd_item", align 1 + %ref.tmp19 = alloca %"range" + %ref.tmp20 = alloca i32, align 4 + %Tangle24 = alloca %"tangle_group" + %agg.tmp25 = alloca %"nd_item", align 1 + %TangleLeader26 = alloca i64 + %TangleSize27 = alloca i64 + %IsMember28 = alloca %"nd_item", align 1 + %agg.tmp30 = alloca %"tangle_group" + %agg.tmp31 = alloca %"nd_item", align 1 + %Tangle33 = alloca %"tangle_group" + %agg.tmp34 = alloca %"nd_item", align 1 + %TangleLeader35 = alloca i64 + %TangleSize36 = alloca i64 + %IsMember37 = alloca %"nd_item", align 1 + %agg.tmp39 = alloca %"tangle_group" + %agg.tmp40 = alloca %"nd_item", align 1 + %WI.ascast = addrspacecast ptr %WI to ptr addrspace(4) + %SG.ascast = addrspacecast ptr %SG to ptr addrspace(4) + %BranchBody.ascast = addrspacecast ptr %BranchBody to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %ref.tmp15.ascast = addrspacecast ptr %ref.tmp15 to ptr addrspace(4) + %Tangle.ascast = addrspacecast ptr %Tangle to ptr addrspace(4) + %IsMember.ascast = addrspacecast ptr %IsMember to ptr addrspace(4) + %ref.tmp19.ascast = addrspacecast ptr %ref.tmp19 to ptr addrspace(4) + %ref.tmp20.ascast = addrspacecast ptr %ref.tmp20 to ptr addrspace(4) + %Tangle24.ascast = addrspacecast ptr %Tangle24 to ptr addrspace(4) + %IsMember28.ascast = addrspacecast ptr %IsMember28 to ptr addrspace(4) + %Tangle33.ascast = addrspacecast ptr %Tangle33 to ptr addrspace(4) + %IsMember37.ascast = addrspacecast ptr %IsMember37 to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %item.ascast = addrspacecast ptr %item to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + call spir_func void @Foo40(ptr addrspace(4) dead_on_unwind writable sret(%"range") %WI.ascast, ptr addrspace(4) align 1 %item.ascast) + call spir_func void @Foo41(ptr addrspace(4) dead_on_unwind writable sret(%"nd_item") align 1 %SG.ascast, ptr addrspace(4) align 1 %item.ascast) + %TmpAcc1 = bitcast ptr %BranchBody to ptr + %TmpAcc22 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %BarrierAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 1 + %BarrierAcc3 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 1 + %0 = getelementptr inbounds i8, ptr addrspace(4) %BranchBody.ascast, i64 64 + %BroadcastAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 3 + %BroadcastAcc4 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 2 + %AnyAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 4 + %AnyAcc5 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 3 + %AllAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 5 + %AllAcc6 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 4 + %NoneAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 6 + %NoneAcc7 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 5 + %ReduceAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 7 + %ReduceAcc8 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 6 + %ExScanAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 8 + %ExScanAcc9 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 7 + %IncScanAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 9 + %IncScanAcc10 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 8 + %ShiftLeftAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 10 + %ShiftLeftAcc11 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 9 + %ShiftRightAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 11 + %ShiftRightAcc12 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 10 + %SelectAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 12 + %SelectAcc13 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 11 + %PermuteXorAcc = getelementptr inbounds %class.anon.8, ptr %BranchBody, i32 0, i32 13 + %PermuteXorAcc14 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %this1, i32 0, i32 12 + store i32 4, ptr %ref.tmp15, align 4 + call spir_func void @Foo42(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) %WI.ascast, ptr addrspace(4) align 4 %ref.tmp15.ascast) + %retval.ascast.i69 = addrspacecast ptr %retval.i66 to ptr addrspace(4) + store ptr addrspace(4) %ref.tmp.ascast, ptr %this.addr.i67 + %this1.i72 = load ptr addrspace(4), ptr %this.addr.i67 + %1 = load i64, ptr addrspace(4) %this1.i72 + store i64 %1, ptr %Result.i68 + %2 = load i64, ptr %Result.i68 + %tobool = icmp ne i64 %2, 0 + br i1 %tobool, label %if.then, label %if.else + +if.else: ; preds = %entry + store i32 24, ptr %ref.tmp20, align 4 + call spir_func void @Foo42(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp19.ascast, ptr addrspace(4) %WI.ascast, ptr addrspace(4) align 4 %ref.tmp20.ascast) + %retval.ascast.i53 = addrspacecast ptr %retval.i50 to ptr addrspace(4) + store ptr addrspace(4) %ref.tmp19.ascast, ptr %this.addr.i51 + %this1.i56 = load ptr addrspace(4), ptr %this.addr.i51 + %3 = load i64, ptr addrspace(4) %this1.i56 + store i64 %3, ptr %Result.i52 + %4 = load i64, ptr %Result.i52 + %tobool22 = icmp ne i64 %4, 0 + br i1 %tobool22, label %if.then23, label %if.else32 + +if.else32: ; preds = %if.else + call spir_func void @Foo43(ptr addrspace(4) dead_on_unwind writable sret(%"tangle_group") %Tangle33.ascast, ptr byval(%"nd_item") align 1 %agg.tmp34) + store i64 24, ptr %TangleLeader35 + store i64 8, ptr %TangleSize36 + %retval.ascast.i = addrspacecast ptr %retval.i to ptr addrspace(4) + store ptr addrspace(4) %WI.ascast, ptr %this.addr.i + %this1.i = load ptr addrspace(4), ptr %this.addr.i + %5 = load i64, ptr addrspace(4) %this1.i + store i64 %5, ptr %Result.i + %6 = load i64, ptr %Result.i + %7 = load i64, ptr %TangleLeader35 + %8 = load i64, ptr %TangleSize36 + call spir_func void @Foo69(ptr addrspace(4) %BranchBody.ascast, i64 %6, ptr byval(%"tangle_group") %agg.tmp39, i64 %7, i64 %8, ptr byval(%"nd_item") align 1 %agg.tmp40) + br label %if.end41 + +if.then23: ; preds = %if.else + call spir_func void @Foo43(ptr addrspace(4) dead_on_unwind writable sret(%"tangle_group") %Tangle24.ascast, ptr byval(%"nd_item") align 1 %agg.tmp25) + store i64 4, ptr %TangleLeader26 + store i64 20, ptr %TangleSize27 + %retval.ascast.i45 = addrspacecast ptr %retval.i42 to ptr addrspace(4) + store ptr addrspace(4) %WI.ascast, ptr %this.addr.i43 + %this1.i48 = load ptr addrspace(4), ptr %this.addr.i43 + %9 = load i64, ptr addrspace(4) %this1.i48 + store i64 %9, ptr %Result.i44 + %10 = load i64, ptr %Result.i44 + %11 = load i64, ptr %TangleLeader26 + %12 = load i64, ptr %TangleSize27 + call spir_func void @Foo68(ptr addrspace(4) %BranchBody.ascast, i64 %10, ptr byval(%"tangle_group") %agg.tmp30, i64 %11, i64 %12, ptr byval(%"nd_item") align 1 %agg.tmp31) + br label %if.end41 + +if.then: ; preds = %entry + call spir_func void @Foo43(ptr addrspace(4) dead_on_unwind writable sret(%"tangle_group") %Tangle.ascast, ptr byval(%"nd_item") align 1 %agg.tmp) + store i64 0, ptr %TangleLeader + store i64 4, ptr %TangleSize + %retval.ascast.i61 = addrspacecast ptr %retval.i58 to ptr addrspace(4) + store ptr addrspace(4) %WI.ascast, ptr %this.addr.i59 + %this1.i64 = load ptr addrspace(4), ptr %this.addr.i59 + %13 = load i64, ptr addrspace(4) %this1.i64 + store i64 %13, ptr %Result.i60 + %14 = load i64, ptr %Result.i60 + %15 = load i64, ptr %TangleLeader + %16 = load i64, ptr %TangleSize + %TangleSize.addr.ascast.i = addrspacecast ptr %TangleSize.addr.i to ptr addrspace(4) + %agg.tmp.ascast.i = addrspacecast ptr %agg.tmp.i to ptr addrspace(4) + %agg.tmp5.ascast.i = addrspacecast ptr %agg.tmp5.i to ptr addrspace(4) + %agg.tmp8.ascast.i = addrspacecast ptr %agg.tmp8.i to ptr addrspace(4) + %agg.tmp15.ascast.i = addrspacecast ptr %agg.tmp15.i to ptr addrspace(4) + %agg.tmp24.ascast.i = addrspacecast ptr %agg.tmp24.i to ptr addrspace(4) + %agg.tmp35.ascast.i = addrspacecast ptr %agg.tmp35.i to ptr addrspace(4) + %agg.tmp46.ascast.i = addrspacecast ptr %agg.tmp46.i to ptr addrspace(4) + %agg.tmp50.ascast.i = addrspacecast ptr %agg.tmp50.i to ptr addrspace(4) + %agg.tmp54.ascast.i = addrspacecast ptr %agg.tmp54.i to ptr addrspace(4) + %agg.tmp58.ascast.i = addrspacecast ptr %agg.tmp58.i to ptr addrspace(4) + %agg.tmp61.ascast.i = addrspacecast ptr %agg.tmp61.i to ptr addrspace(4) + %agg.tmp65.ascast.i = addrspacecast ptr %agg.tmp65.i to ptr addrspace(4) + %agg.tmp69.ascast.i = addrspacecast ptr %agg.tmp69.i to ptr addrspace(4) + %agg.tmp79.ascast.i = addrspacecast ptr %agg.tmp79.i to ptr addrspace(4) + %agg.tmp88.ascast.i = addrspacecast ptr %agg.tmp88.i to ptr addrspace(4) + %agg.tmp92.ascast.i = addrspacecast ptr %agg.tmp92.i to ptr addrspace(4) + %ref.tmp.ascast.i = addrspacecast ptr %ref.tmp.i to ptr addrspace(4) + %ref.tmp93.ascast.i = addrspacecast ptr %ref.tmp93.i to ptr addrspace(4) + %ref.tmp94.ascast.i = addrspacecast ptr %ref.tmp94.i to ptr addrspace(4) + %agg.tmp100.ascast.i = addrspacecast ptr %agg.tmp100.i to ptr addrspace(4) + %agg.tmp106.ascast.i = addrspacecast ptr %agg.tmp106.i to ptr addrspace(4) + store ptr addrspace(4) %BranchBody.ascast, ptr %this.addr.i76 + store i64 %14, ptr %WI.addr.i + %Tangle.ascast.i = addrspacecast ptr %agg.tmp17.ascast.ascast74 to ptr addrspace(4) + store i64 %15, ptr %TangleLeader.addr.i + store i64 %16, ptr %TangleSize.addr.i + %IsMember.ascast.i = addrspacecast ptr %agg.tmp18.ascast.ascast75 to ptr addrspace(4) + %this1.i78 = load ptr addrspace(4), ptr %this.addr.i76 + %17 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp.ascast.i, i64 %17) + %call.i = call spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %this1.i78, ptr byval(%"range") %agg.tmp.i) + store i64 1, ptr addrspace(4) %call.i + call spir_func void @Foo75(ptr byval(%"tangle_group") %agg.tmp2.i, i32 1) + store i64 0, ptr %Visible.i + store i64 0, ptr %Other.i + br label %for.cond.i + +for.cond.i: ; preds = %if.end.i, %if.then + %18 = load i64, ptr %Other.i + %cmp.i79 = icmp ult i64 %18, 32 + br i1 %cmp.i79, label %for.body.i, label %for.cond.cleanup.i + +for.cond.cleanup.i: ; preds = %for.cond.i + %19 = load i64, ptr %Visible.i + %20 = load i64, ptr %TangleSize.addr.i + %cmp7.i = icmp eq i64 %19, %20 + %BarrierAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 1 + %21 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp8.ascast.i, i64 %21) + %call9.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %BarrierAcc.i, ptr byval(%"range") %agg.tmp8.i) + %storedv.i = zext i1 %cmp7.i to i8 + store i8 %storedv.i, ptr addrspace(4) %call9.i, align 1 + %22 = getelementptr inbounds i8, ptr addrspace(4) %this1.i78, i64 64 + %call10.i = call spir_func i32 @Foo76(ptr addrspace(4) align 1 %22) + store i32 %call10.i, ptr %OriginalLID.i, align 4 + %call11.i = call spir_func i32 @Foo90(ptr addrspace(4) %Tangle.ascast.i) + store i32 %call11.i, ptr %LID.i, align 4 + %23 = load i32, ptr %OriginalLID.i, align 4 + %call13.i = call spir_func i32 @Foo91(ptr byval(%"tangle_group") %agg.tmp12.i, i32 %23, i32 0) + store i32 %call13.i, ptr %BroadcastResult.i, align 4 + %24 = load i32, ptr %BroadcastResult.i, align 4 + %conv.i = zext i32 %24 to i64 + %25 = load i64, ptr %TangleLeader.addr.i + %cmp14.i = icmp eq i64 %conv.i, %25 + %BroadcastAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 3 + %26 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp15.ascast.i, i64 %26) + %call16.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %BroadcastAcc.i, ptr byval(%"range") %agg.tmp15.i) + %storedv17.i = zext i1 %cmp14.i to i8 + store i8 %storedv17.i, ptr addrspace(4) %call16.i, align 1 + %27 = load i32, ptr %LID.i, align 4 + %cmp19.i = icmp eq i32 %27, 0 + %call20.i = call spir_func zeroext i1 @Foo92(ptr byval(%"tangle_group") %agg.tmp18.i, i1 zeroext %cmp19.i) + %storedv21.i = zext i1 %call20.i to i8 + store i8 %storedv21.i, ptr %AnyResult.i, align 1 + %28 = load i8, ptr %AnyResult.i, align 1 + %loadedv.i = trunc i8 %28 to i1 + %conv22.i = zext i1 %loadedv.i to i32 + %AnyAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 4 + %29 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp24.ascast.i, i64 %29) + %call25.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %AnyAcc.i, ptr byval(%"range") %agg.tmp24.i) + %storedv26.i = zext i1 %loadedv.i to i8 + store i8 %storedv26.i, ptr addrspace(4) %call25.i, align 1 + %30 = load i32, ptr %LID.i, align 4 + %conv28.i = zext i32 %30 to i64 + %31 = load i64, ptr %TangleSize.addr.i + %cmp29.i = icmp ult i64 %conv28.i, %31 + %call30.i = call spir_func zeroext i1 @Foo67(ptr byval(%"tangle_group") %agg.tmp27.i, i1 zeroext %cmp29.i) + %storedv31.i = zext i1 %call30.i to i8 + store i8 %storedv31.i, ptr %AllResult.i, align 1 + %32 = load i8, ptr %AllResult.i, align 1 + %loadedv32.i = trunc i8 %32 to i1 + %conv33.i = zext i1 %loadedv32.i to i32 + %AllAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 5 + %33 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp35.ascast.i, i64 %33) + %call36.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %AllAcc.i, ptr byval(%"range") %agg.tmp35.i) + %storedv37.i = zext i1 %loadedv32.i to i8 + store i8 %storedv37.i, ptr addrspace(4) %call36.i, align 1 + %34 = load i32, ptr %LID.i, align 4 + %conv39.i = zext i32 %34 to i64 + %35 = load i64, ptr %TangleSize.addr.i + %cmp40.i = icmp uge i64 %conv39.i, %35 + %call41.i = call spir_func zeroext i1 @Foo65(ptr byval(%"tangle_group") %agg.tmp38.i, i1 zeroext %cmp40.i) + %storedv42.i = zext i1 %call41.i to i8 + store i8 %storedv42.i, ptr %NoneResult.i, align 1 + %36 = load i8, ptr %NoneResult.i, align 1 + %loadedv43.i = trunc i8 %36 to i1 + %conv44.i = zext i1 %loadedv43.i to i32 + %NoneAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 6 + %37 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp46.ascast.i, i64 %37) + %call47.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %NoneAcc.i, ptr byval(%"range") %agg.tmp46.i) + %storedv48.i = zext i1 %loadedv43.i to i8 + store i8 %storedv48.i, ptr addrspace(4) %call47.i, align 1 + %call51.i = call spir_func i32 @Foo64(ptr byval(%"tangle_group") %agg.tmp49.i, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp50.i) + store i32 %call51.i, ptr %ReduceResult.i, align 4 + %38 = load i32, ptr %ReduceResult.i, align 4 + %conv52.i = zext i32 %38 to i64 + %39 = load i64, ptr %TangleSize.addr.i + %cmp53.i = icmp eq i64 %conv52.i, %39 + %ReduceAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 7 + %40 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp54.ascast.i, i64 %40) + %call55.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ReduceAcc.i, ptr byval(%"range") %agg.tmp54.i) + %storedv56.i = zext i1 %cmp53.i to i8 + store i8 %storedv56.i, ptr addrspace(4) %call55.i, align 1 + %call59.i = call spir_func i32 @Foo63(ptr byval(%"tangle_group") %agg.tmp57.i, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp58.i) + store i32 %call59.i, ptr %ExScanResult.i, align 4 + %41 = load i32, ptr %ExScanResult.i, align 4 + %42 = load i32, ptr %LID.i, align 4 + %cmp60.i = icmp eq i32 %41, %42 + %ExScanAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 8 + %43 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp61.ascast.i, i64 %43) + %call62.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ExScanAcc.i, ptr byval(%"range") %agg.tmp61.i) + %storedv63.i = zext i1 %cmp60.i to i8 + store i8 %storedv63.i, ptr addrspace(4) %call62.i, align 1 + %call66.i = call spir_func i32 @Foo62(ptr byval(%"tangle_group") %agg.tmp64.i, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp65.i) + store i32 %call66.i, ptr %IncScanResult.i, align 4 + %44 = load i32, ptr %IncScanResult.i, align 4 + %45 = load i32, ptr %LID.i, align 4 + %add67.i = add i32 %45, 1 + %cmp68.i = icmp eq i32 %44, %add67.i + %IncScanAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 9 + %46 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp69.ascast.i, i64 %46) + %call70.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %IncScanAcc.i, ptr byval(%"range") %agg.tmp69.i) + %storedv71.i = zext i1 %cmp68.i to i8 + store i8 %storedv71.i, ptr addrspace(4) %call70.i, align 1 + %47 = load i32, ptr %LID.i, align 4 + %call73.i = call spir_func i32 @Foo73(ptr byval(%"tangle_group") %agg.tmp72.i, i32 %47, i32 2) + store i32 %call73.i, ptr %ShiftLeftResult.i, align 4 + %48 = load i32, ptr %LID.i, align 4 + %add74.i = add i32 %48, 2 + %conv75.i = zext i32 %add74.i to i64 + %49 = load i64, ptr %TangleSize.addr.i + %cmp76.i = icmp uge i64 %conv75.i, %49 + br i1 %cmp76.i, label %lor.end.i, label %lor.rhs.i + +lor.rhs.i: ; preds = %for.cond.cleanup.i + %50 = load i32, ptr %ShiftLeftResult.i, align 4 + %51 = load i32, ptr %LID.i, align 4 + %add77.i = add i32 %51, 2 + %cmp78.i = icmp eq i32 %50, %add77.i + br label %lor.end.i + +lor.end.i: ; preds = %lor.rhs.i, %for.cond.cleanup.i + %52 = phi i1 [ true, %for.cond.cleanup.i ], [ %cmp78.i, %lor.rhs.i ] + %ShiftLeftAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 10 + %53 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp79.ascast.i, i64 %53) + %call80.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ShiftLeftAcc.i, ptr byval(%"range") %agg.tmp79.i) + %storedv81.i = zext i1 %52 to i8 + store i8 %storedv81.i, ptr addrspace(4) %call80.i, align 1 + %54 = load i32, ptr %LID.i, align 4 + %call83.i = call spir_func i32 @Foo53(ptr byval(%"tangle_group") %agg.tmp82.i, i32 %54, i32 2) + store i32 %call83.i, ptr %ShiftRightResult.i, align 4 + %55 = load i32, ptr %LID.i, align 4 + %cmp84.i = icmp ult i32 %55, 2 + br i1 %cmp84.i, label %l1.exit, label %lor.rhs85.i + +lor.rhs85.i: ; preds = %lor.end.i + %56 = load i32, ptr %ShiftRightResult.i, align 4 + %57 = load i32, ptr %LID.i, align 4 + %sub.i = sub i32 %57, 2 + %cmp86.i = icmp eq i32 %56, %sub.i + br label %l1.exit + +l1.exit: ; preds = %lor.rhs85.i, %lor.end.i + %58 = phi i1 [ true, %lor.end.i ], [ %cmp86.i, %lor.rhs85.i ] + %ShiftRightAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 11 + %59 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp88.ascast.i, i64 %59) + %call89.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ShiftRightAcc.i, ptr byval(%"range") %agg.tmp88.i) + %storedv90.i = zext i1 %58 to i8 + store i8 %storedv90.i, ptr addrspace(4) %call89.i, align 1 + %60 = load i32, ptr %LID.i, align 4 + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp93.ascast.i, ptr addrspace(4) %Tangle.ascast.i) + store i32 2, ptr %ref.tmp94.i, align 4 + call spir_func void @Foo55(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast.i, ptr addrspace(4) %ref.tmp93.ascast.i, ptr addrspace(4) align 4 %ref.tmp94.ascast.i) + call spir_func void @Foo56(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp92.ascast.i, ptr addrspace(4) %ref.tmp.ascast.i, ptr addrspace(4) %TangleSize.addr.ascast.i) + %call95.i = call spir_func i32 @Foo57(ptr byval(%"tangle_group") %agg.tmp91.i, i32 %60, ptr byval(%"range") %agg.tmp92.i) + store i32 %call95.i, ptr %SelectResult.i, align 4 + %61 = load i32, ptr %SelectResult.i, align 4 + %conv96.i = zext i32 %61 to i64 + %62 = load i32, ptr %LID.i, align 4 + %add97.i = add i32 %62, 2 + %conv98.i = zext i32 %add97.i to i64 + %63 = load i64, ptr %TangleSize.addr.i + %rem.i = urem i64 %conv98.i, %63 + %cmp99.i = icmp eq i64 %conv96.i, %rem.i + %SelectAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 12 + %64 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp100.ascast.i, i64 %64) + %call101.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %SelectAcc.i, ptr byval(%"range") %agg.tmp100.i) + %storedv102.i = zext i1 %cmp99.i to i8 + store i8 %storedv102.i, ptr addrspace(4) %call101.i, align 1 + %65 = load i32, ptr %LID.i, align 4 + %call104.i = call spir_func i32 @Foo58(ptr byval(%"tangle_group") %agg.tmp103.i, i32 %65, i32 2) + store i32 %call104.i, ptr %PermuteXorResult.i, align 4 + %66 = load i32, ptr %PermuteXorResult.i, align 4 + %67 = load i32, ptr %LID.i, align 4 + %xor.i = xor i32 %67, 2 + %cmp105.i = icmp eq i32 %66, %xor.i + %PermuteXorAcc.i = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1.i78, i32 0, i32 13 + %68 = load i64, ptr %WI.addr.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp106.ascast.i, i64 %68) + %call107.i = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %PermuteXorAcc.i, ptr byval(%"range") %agg.tmp106.i) + %storedv108.i = zext i1 %cmp105.i to i8 + store i8 %storedv108.i, ptr addrspace(4) %call107.i, align 1 + br label %if.end41 + +if.end41: ; preds = %if.then23, %if.else32, %l1.exit + ret void + +for.body.i: ; preds = %for.cond.i + %69 = load i64, ptr %Other.i + %call3.i = call spir_func zeroext i1 @Foo71(ptr addrspace(4) align 1 %IsMember.ascast.i, i64 %69) + br i1 %call3.i, label %if.then.i, label %if.end.i + +if.then.i: ; preds = %for.body.i + %70 = load i64, ptr %Other.i + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp5.ascast.i, i64 %70) + %call6.i = call spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %this1.i78, ptr byval(%"range") %agg.tmp5.i) + %71 = load i64, ptr addrspace(4) %call6.i + %72 = load i64, ptr %Visible.i + %add.i = add i64 %72, %71 + store i64 %add.i, ptr %Visible.i + br label %if.end.i + +if.end.i: ; preds = %if.then.i, %for.body.i + %73 = load i64, ptr %Other.i + %inc.i = add i64 %73, 1 + store i64 %inc.i, ptr %Other.i + br label %for.cond.i +} + +define internal spir_func void @Foo40(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) align 1 %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + call spir_func void @Init6(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + +define internal spir_func void @Foo41(ptr addrspace(4) dead_on_unwind noalias writable sret(%"nd_item") align 1 %agg.result, ptr addrspace(4) align 1 %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + ret void +} + + + + +define internal spir_func void @Foo42(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) %lhs, ptr addrspace(4) align 4 %rhs) { +entry: + %lhs.addr = alloca ptr addrspace(4) + %rhs.addr = alloca ptr addrspace(4) + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + store ptr addrspace(4) %lhs, ptr %lhs.addr + store ptr addrspace(4) %rhs, ptr %rhs.addr + call spir_func void @Foo11(ptr addrspace(4) %agg.result) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %1 = load ptr addrspace(4), ptr %lhs.addr + %common_array1 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %2 = load i32, ptr %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array1, i64 0, i64 %idxprom + %3 = load i64, ptr addrspace(4) %arrayidx + %4 = load ptr addrspace(4), ptr %rhs.addr + %5 = load i32, ptr addrspace(4) %4, align 4 + %conv = sext i32 %5 to i64 + %cmp1 = icmp ult i64 %3, %conv + %conv2 = zext i1 %cmp1 to i64 + %common_array32 = bitcast ptr addrspace(4) %agg.result to ptr addrspace(4) + %6 = load i32, ptr %i, align 4 + %idxprom4 = sext i32 %6 to i64 + %arrayidx5 = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array32, i64 0, i64 %idxprom4 + store i64 %conv2, ptr addrspace(4) %arrayidx5 + %7 = load i32, ptr %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond +} + +declare void @llvm.assume(i1) + + +define internal spir_func void @Foo43(ptr addrspace(4) dead_on_unwind noalias writable sret(%"tangle_group") %agg.result, ptr byval(%"nd_item") align 1 %group) { +entry: + %mask = alloca %"ss_sub_group_mask" + %agg.tmp = alloca %"nd_item", align 1 + %agg.tmp1 = alloca %"ss_sub_group_mask" + %cleanup.dest.slot = alloca i32, align 4 + %mask.ascast = addrspacecast ptr %mask to ptr addrspace(4) + %group.ascast = addrspacecast ptr %group to ptr addrspace(4) + call spir_func void @Foo44(ptr addrspace(4) dead_on_unwind writable sret(%"ss_sub_group_mask") %mask.ascast, ptr byval(%"nd_item") align 1 %agg.tmp, i1 zeroext true) + call spir_func void @Foo45(ptr addrspace(4) %agg.result, ptr byval(%"ss_sub_group_mask") %agg.tmp1) + ret void +} + + +define internal spir_func void @Foo46(ptr addrspace(4) %this, i64 %dim0) { +entry: + %this.addr = alloca ptr addrspace(4) + %dim0.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %dim0, ptr %dim0.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i64, ptr %dim0.addr + call spir_func void @Foo60(ptr addrspace(4) %this1, i64 %0) + ret void +} + + +define internal spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %this, ptr byval(%"range") %Index) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %LinearIndex = alloca i64 + %agg.tmp = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %call = call spir_func i64 @Foo93(ptr addrspace(4) %this1, ptr byval(%"range") %agg.tmp) + store i64 %call, ptr %LinearIndex + %call2 = call spir_func ptr addrspace(1) @Foo94(ptr addrspace(4) %this1) + %0 = load i64, ptr %LinearIndex + %arrayidx = getelementptr inbounds nuw i64, ptr addrspace(1) %call2, i64 %0 + %arrayidx.ascast = addrspacecast ptr addrspace(1) %arrayidx to ptr addrspace(4) + ret ptr addrspace(4) %arrayidx.ascast +} + + +define internal spir_func void @Foo75(ptr byval(%"tangle_group") %G, i32 %FenceScope) { +entry: + %FenceScope.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + store i32 %FenceScope, ptr %FenceScope.addr, align 4 + %0 = load i32, ptr %FenceScope.addr, align 4 + call spir_func void @Foo95(ptr byval(%"tangle_group") %agg.tmp, i32 %0, i32 5) + ret void +} + + +define internal spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %this, ptr byval(%"range") %Index) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %LinearIndex = alloca i64 + %agg.tmp = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %call = call spir_func i64 @Foo77(ptr addrspace(4) %this1, ptr byval(%"range") %agg.tmp) + store i64 %call, ptr %LinearIndex + %call2 = call spir_func ptr addrspace(1) @Foo78(ptr addrspace(4) %this1) + %0 = load i64, ptr %LinearIndex + %arrayidx = getelementptr inbounds nuw i8, ptr addrspace(1) %call2, i64 %0 + %arrayidx.ascast = addrspacecast ptr addrspace(1) %arrayidx to ptr addrspace(4) + ret ptr addrspace(4) %arrayidx.ascast +} + + +define internal spir_func i32 @Foo76(ptr addrspace(4) align 1 %this) { +entry: + %retval = alloca i32, align 4 + %this.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + call spir_func void @Foo96(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) align 1 %this1) + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %ref.tmp.ascast, i32 0) + %0 = load i64, ptr addrspace(4) %call + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + + +define internal spir_func i32 @Foo90(ptr addrspace(4) %this) { +entry: + %retval = alloca i32, align 4 + %this.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) %this1) + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %ref.tmp.ascast, i32 0) + %0 = load i64, ptr addrspace(4) %call + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + + +define internal spir_func i32 @Foo91(ptr byval(%"tangle_group") %g, i32 %x, i32 %linear_local_id) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %linear_local_id.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"range" + %agg.tmp2 = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %agg.tmp1.ascast = addrspacecast ptr %agg.tmp1 to ptr addrspace(4) + %agg.tmp2.ascast = addrspacecast ptr %agg.tmp2 to ptr addrspace(4) + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i32 %linear_local_id, ptr %linear_local_id.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + call spir_func void @Foo97(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp2.ascast, ptr addrspace(4) %g.ascast) + %1 = load i32, ptr %linear_local_id.addr, align 4 + %conv = zext i32 %1 to i64 + call spir_func void @Foo98(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp1.ascast, ptr byval(%"range") %agg.tmp2, i64 %conv) + %call = call spir_func i32 @Bar69(ptr byval(%"tangle_group") %agg.tmp, i32 %0, ptr byval(%"range") %agg.tmp1) + ret i32 %call +} + + +define internal spir_func zeroext i1 @Foo92(ptr byval(%"tangle_group") %g, i1 zeroext %pred) { +entry: + %retval = alloca i1, align 1 + %pred.addr = alloca i8, align 1 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %storedv = zext i1 %pred to i8 + store i8 %storedv, ptr %pred.addr, align 1 + %0 = load i8, ptr %pred.addr, align 1 + %loadedv = trunc i8 %0 to i1 + %call = call spir_func zeroext i1 @Bar10(ptr byval(%"tangle_group") %agg.tmp, i1 zeroext %loadedv) + ret i1 %call +} + + +define internal spir_func zeroext i1 @Foo67(ptr byval(%"tangle_group") %g, i1 zeroext %pred) { +entry: + %retval = alloca i1, align 1 + %pred.addr = alloca i8, align 1 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %storedv = zext i1 %pred to i8 + store i8 %storedv, ptr %pred.addr, align 1 + %0 = load i8, ptr %pred.addr, align 1 + %loadedv = trunc i8 %0 to i1 + %call = call spir_func zeroext i1 @Foo66(ptr byval(%"tangle_group") %agg.tmp, i1 zeroext %loadedv) + ret i1 %call +} + + +define internal spir_func zeroext i1 @Foo65(ptr byval(%"tangle_group") %g, i1 zeroext %pred) { +entry: + %retval = alloca i1, align 1 + %pred.addr = alloca i8, align 1 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %storedv = zext i1 %pred to i8 + store i8 %storedv, ptr %pred.addr, align 1 + %0 = load i8, ptr %pred.addr, align 1 + %loadedv = trunc i8 %0 to i1 + %lnot = xor i1 %loadedv, true + %call = call spir_func zeroext i1 @Foo66(ptr byval(%"tangle_group") %agg.tmp, i1 zeroext %lnot) + ret i1 %call +} + + +define internal spir_func i32 @Foo64(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"nd_item") align 1 %binary_op) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"nd_item", align 1 + %agg.tmp2 = alloca %"nd_item", align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %binary_op.ascast = addrspacecast ptr %binary_op to ptr addrspace(4) + %0 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar11(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"nd_item") align 1 %agg.tmp1, i32 %0, ptr byval(%"nd_item") align 1 %agg.tmp2) + ret i32 %call +} + + +define internal spir_func i32 @Foo63(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"nd_item") align 1 %binary_op) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %res = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"nd_item", align 1 + %agg.tmp2 = alloca %"nd_item", align 1 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %binary_op.ascast = addrspacecast ptr %binary_op to ptr addrspace(4) + %0 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar12(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"nd_item") align 1 %agg.tmp1, i32 %0, ptr byval(%"nd_item") align 1 %agg.tmp2) + store i32 %call, ptr %res, align 4 + %1 = load i32, ptr %res, align 4 + ret i32 %1 +} + + +define internal spir_func i32 @Foo62(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"nd_item") align 1 %binary_op) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"nd_item", align 1 + %agg.tmp2 = alloca %"nd_item", align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %binary_op.ascast = addrspacecast ptr %binary_op to ptr addrspace(4) + %0 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Foo61(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"nd_item") align 1 %agg.tmp1, i32 %0, ptr byval(%"nd_item") align 1 %agg.tmp2) + ret i32 %call +} + + +define internal spir_func i32 @Foo73(ptr byval(%"tangle_group") %g, i32 %x, i32 %delta) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %delta.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i32 %delta, ptr %delta.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %1 = load i32, ptr %delta.addr, align 4 + %call = call spir_func i32 @Foo72(ptr byval(%"tangle_group") %agg.tmp, i32 %0, i32 %1) + ret i32 %call +} + + +define internal spir_func zeroext i1 @Foo71(ptr addrspace(4) align 1 %this, i64 %Other) { +entry: + %retval = alloca i1, align 1 + %this.addr = alloca ptr addrspace(4) + %Other.addr = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %Other, ptr %Other.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i64, ptr %Other.addr + %cmp = icmp ult i64 %0, 4 + ret i1 %cmp +} + + +define internal spir_func i32 @Foo53(ptr byval(%"tangle_group") %g, i32 %x, i32 %delta) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %delta.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i32 %delta, ptr %delta.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %1 = load i32, ptr %delta.addr, align 4 + %call = call spir_func i32 @Foo52(ptr byval(%"tangle_group") %agg.tmp, i32 %0, i32 %1) + ret i32 %call +} + + +define internal spir_func void @Foo51(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) %this) { +entry: + %this.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"ss_sub_group_mask" + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %Mask1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %call = call spir_func i32 @Foo47(ptr byval(%"ss_sub_group_mask") %agg.tmp) + %conv = zext i32 %call to i64 + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %conv) + ret void +} + + +define internal spir_func void @Foo55(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) %lhs, ptr addrspace(4) align 4 %rhs) { +entry: + %lhs.addr = alloca ptr addrspace(4) + %rhs.addr = alloca ptr addrspace(4) + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + store ptr addrspace(4) %lhs, ptr %lhs.addr + store ptr addrspace(4) %rhs, ptr %rhs.addr + call spir_func void @Foo11(ptr addrspace(4) %agg.result) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %1 = load ptr addrspace(4), ptr %lhs.addr + %common_array2 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %2 = load i32, ptr %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array2, i64 0, i64 %idxprom + %3 = load i64, ptr addrspace(4) %arrayidx + %4 = load ptr addrspace(4), ptr %rhs.addr + %5 = load i32, ptr addrspace(4) %4, align 4 + %conv = sext i32 %5 to i64 + %add = add i64 %3, %conv + %common_array13 = bitcast ptr addrspace(4) %agg.result to ptr addrspace(4) + %6 = load i32, ptr %i, align 4 + %idxprom2 = sext i32 %6 to i64 + %arrayidx3 = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array13, i64 0, i64 %idxprom2 + store i64 %add, ptr addrspace(4) %arrayidx3 + %7 = load i32, ptr %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond +} + + +define internal spir_func void @Foo56(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) %lhs, ptr addrspace(4) %rhs) { +entry: + %lhs.addr = alloca ptr addrspace(4) + %rhs.addr = alloca ptr addrspace(4) + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + store ptr addrspace(4) %lhs, ptr %lhs.addr + store ptr addrspace(4) %rhs, ptr %rhs.addr + call spir_func void @Foo11(ptr addrspace(4) %agg.result) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %1 = load ptr addrspace(4), ptr %lhs.addr + %common_array2 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %2 = load i32, ptr %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array2, i64 0, i64 %idxprom + %3 = load i64, ptr addrspace(4) %arrayidx + %4 = load ptr addrspace(4), ptr %rhs.addr + %5 = load i64, ptr addrspace(4) %4 + %rem = urem i64 %3, %5 + %common_array13 = bitcast ptr addrspace(4) %agg.result to ptr addrspace(4) + %6 = load i32, ptr %i, align 4 + %idxprom2 = sext i32 %6 to i64 + %arrayidx3 = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array13, i64 0, i64 %idxprom2 + store i64 %rem, ptr addrspace(4) %arrayidx3 + %7 = load i32, ptr %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond +} + + +define internal spir_func i32 @Foo57(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"range") %local_id) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Foo59(ptr byval(%"tangle_group") %agg.tmp, i32 %0, ptr byval(%"range") %agg.tmp1) + ret i32 %call +} + + +define internal spir_func i32 @Foo58(ptr byval(%"tangle_group") %g, i32 %x, i32 %mask) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %mask.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %agg.tmp1.ascast = addrspacecast ptr %agg.tmp1 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i32 %mask, ptr %mask.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %1 = load i32, ptr %mask.addr, align 4 + %conv = zext i32 %1 to i64 + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp1.ascast, i64 %conv) + %call = call spir_func i32 @Bar13(ptr byval(%"tangle_group") %agg.tmp, i32 %0, ptr byval(%"range") %agg.tmp1) + ret i32 %call +} + + +define internal spir_func void @Foo68(ptr addrspace(4) %this, i64 %WI, ptr byval(%"tangle_group") %Tangle, i64 %TangleLeader, i64 %TangleSize, ptr byval(%"nd_item") align 1 %IsMember) { +entry: + %this.addr = alloca ptr addrspace(4) + %WI.addr = alloca i64 + %TangleLeader.addr = alloca i64 + %TangleSize.addr = alloca i64 + %agg.tmp = alloca %"range" + %agg.tmp2 = alloca %"tangle_group" + %Visible = alloca i64 + %Other = alloca i64 + %cleanup.dest.slot = alloca i32, align 4 + %agg.tmp5 = alloca %"range" + %agg.tmp8 = alloca %"range" + %OriginalLID = alloca i32, align 4 + %LID = alloca i32, align 4 + %BroadcastResult = alloca i32, align 4 + %agg.tmp12 = alloca %"tangle_group" + %agg.tmp15 = alloca %"range" + %AnyResult = alloca i8, align 1 + %agg.tmp18 = alloca %"tangle_group" + %agg.tmp24 = alloca %"range" + %AllResult = alloca i8, align 1 + %agg.tmp27 = alloca %"tangle_group" + %agg.tmp35 = alloca %"range" + %NoneResult = alloca i8, align 1 + %agg.tmp38 = alloca %"tangle_group" + %agg.tmp46 = alloca %"range" + %ReduceResult = alloca i32, align 4 + %agg.tmp49 = alloca %"tangle_group" + %agg.tmp50 = alloca %"nd_item", align 1 + %agg.tmp54 = alloca %"range" + %ExScanResult = alloca i32, align 4 + %agg.tmp57 = alloca %"tangle_group" + %agg.tmp58 = alloca %"nd_item", align 1 + %agg.tmp61 = alloca %"range" + %IncScanResult = alloca i32, align 4 + %agg.tmp64 = alloca %"tangle_group" + %agg.tmp65 = alloca %"nd_item", align 1 + %agg.tmp69 = alloca %"range" + %ShiftLeftResult = alloca i32, align 4 + %agg.tmp72 = alloca %"tangle_group" + %agg.tmp79 = alloca %"range" + %ShiftRightResult = alloca i32, align 4 + %agg.tmp82 = alloca %"tangle_group" + %agg.tmp88 = alloca %"range" + %SelectResult = alloca i32, align 4 + %agg.tmp91 = alloca %"tangle_group" + %agg.tmp92 = alloca %"range" + %ref.tmp = alloca %"range" + %ref.tmp93 = alloca %"range" + %ref.tmp94 = alloca i32, align 4 + %agg.tmp100 = alloca %"range" + %PermuteXorResult = alloca i32, align 4 + %agg.tmp103 = alloca %"tangle_group" + %agg.tmp106 = alloca %"range" + %TangleSize.addr.ascast = addrspacecast ptr %TangleSize.addr to ptr addrspace(4) + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + %agg.tmp5.ascast = addrspacecast ptr %agg.tmp5 to ptr addrspace(4) + %agg.tmp8.ascast = addrspacecast ptr %agg.tmp8 to ptr addrspace(4) + %agg.tmp15.ascast = addrspacecast ptr %agg.tmp15 to ptr addrspace(4) + %agg.tmp24.ascast = addrspacecast ptr %agg.tmp24 to ptr addrspace(4) + %agg.tmp35.ascast = addrspacecast ptr %agg.tmp35 to ptr addrspace(4) + %agg.tmp46.ascast = addrspacecast ptr %agg.tmp46 to ptr addrspace(4) + %agg.tmp54.ascast = addrspacecast ptr %agg.tmp54 to ptr addrspace(4) + %agg.tmp61.ascast = addrspacecast ptr %agg.tmp61 to ptr addrspace(4) + %agg.tmp69.ascast = addrspacecast ptr %agg.tmp69 to ptr addrspace(4) + %agg.tmp79.ascast = addrspacecast ptr %agg.tmp79 to ptr addrspace(4) + %agg.tmp88.ascast = addrspacecast ptr %agg.tmp88 to ptr addrspace(4) + %agg.tmp92.ascast = addrspacecast ptr %agg.tmp92 to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %ref.tmp93.ascast = addrspacecast ptr %ref.tmp93 to ptr addrspace(4) + %ref.tmp94.ascast = addrspacecast ptr %ref.tmp94 to ptr addrspace(4) + %agg.tmp100.ascast = addrspacecast ptr %agg.tmp100 to ptr addrspace(4) + %agg.tmp106.ascast = addrspacecast ptr %agg.tmp106 to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %WI, ptr %WI.addr + %Tangle.ascast = addrspacecast ptr %Tangle to ptr addrspace(4) + store i64 %TangleLeader, ptr %TangleLeader.addr + store i64 %TangleSize, ptr %TangleSize.addr + %IsMember.ascast = addrspacecast ptr %IsMember to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + %TmpAcc1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp.ascast, i64 %0) + %call = call spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %TmpAcc1, ptr byval(%"range") %agg.tmp) + store i64 1, ptr addrspace(4) %call + call spir_func void @Foo75(ptr byval(%"tangle_group") %agg.tmp2, i32 1) + store i64 0, ptr %Visible + store i64 0, ptr %Other + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %1 = load i64, ptr %Other + %cmp = icmp ult i64 %1, 32 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %2 = load i64, ptr %Visible + %3 = load i64, ptr %TangleSize.addr + %cmp7 = icmp eq i64 %2, %3 + %BarrierAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 1 + %4 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp8.ascast, i64 %4) + %call9 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %BarrierAcc, ptr byval(%"range") %agg.tmp8) + %storedv = zext i1 %cmp7 to i8 + store i8 %storedv, ptr addrspace(4) %call9, align 1 + %5 = getelementptr inbounds i8, ptr addrspace(4) %this1, i64 64 + %call10 = call spir_func i32 @Foo76(ptr addrspace(4) align 1 %5) + store i32 %call10, ptr %OriginalLID, align 4 + %call11 = call spir_func i32 @Foo90(ptr addrspace(4) %Tangle.ascast) + store i32 %call11, ptr %LID, align 4 + %6 = load i32, ptr %OriginalLID, align 4 + %call13 = call spir_func i32 @Foo91(ptr byval(%"tangle_group") %agg.tmp12, i32 %6, i32 0) + store i32 %call13, ptr %BroadcastResult, align 4 + %7 = load i32, ptr %BroadcastResult, align 4 + %conv = zext i32 %7 to i64 + %8 = load i64, ptr %TangleLeader.addr + %cmp14 = icmp eq i64 %conv, %8 + %BroadcastAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 3 + %9 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp15.ascast, i64 %9) + %call16 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %BroadcastAcc, ptr byval(%"range") %agg.tmp15) + %storedv17 = zext i1 %cmp14 to i8 + store i8 %storedv17, ptr addrspace(4) %call16, align 1 + %10 = load i32, ptr %LID, align 4 + %cmp19 = icmp eq i32 %10, 0 + %call20 = call spir_func zeroext i1 @Foo92(ptr byval(%"tangle_group") %agg.tmp18, i1 zeroext %cmp19) + %storedv21 = zext i1 %call20 to i8 + store i8 %storedv21, ptr %AnyResult, align 1 + %11 = load i8, ptr %AnyResult, align 1 + %loadedv = trunc i8 %11 to i1 + %conv22 = zext i1 %loadedv to i32 + %cmp23 = icmp eq i32 %conv22, 1 + %AnyAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 4 + %12 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp24.ascast, i64 %12) + %call25 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %AnyAcc, ptr byval(%"range") %agg.tmp24) + %storedv26 = zext i1 %cmp23 to i8 + store i8 %storedv26, ptr addrspace(4) %call25, align 1 + %13 = load i32, ptr %LID, align 4 + %conv28 = zext i32 %13 to i64 + %14 = load i64, ptr %TangleSize.addr + %cmp29 = icmp ult i64 %conv28, %14 + %call30 = call spir_func zeroext i1 @Foo67(ptr byval(%"tangle_group") %agg.tmp27, i1 zeroext %cmp29) + %storedv31 = zext i1 %call30 to i8 + store i8 %storedv31, ptr %AllResult, align 1 + %15 = load i8, ptr %AllResult, align 1 + %loadedv32 = trunc i8 %15 to i1 + %conv33 = zext i1 %loadedv32 to i32 + %cmp34 = icmp eq i32 %conv33, 1 + %AllAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 5 + %16 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp35.ascast, i64 %16) + %call36 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %AllAcc, ptr byval(%"range") %agg.tmp35) + %storedv37 = zext i1 %cmp34 to i8 + store i8 %storedv37, ptr addrspace(4) %call36, align 1 + %17 = load i32, ptr %LID, align 4 + %conv39 = zext i32 %17 to i64 + %18 = load i64, ptr %TangleSize.addr + %cmp40 = icmp uge i64 %conv39, %18 + %call41 = call spir_func zeroext i1 @Foo65(ptr byval(%"tangle_group") %agg.tmp38, i1 zeroext %cmp40) + %storedv42 = zext i1 %call41 to i8 + store i8 %storedv42, ptr %NoneResult, align 1 + %19 = load i8, ptr %NoneResult, align 1 + %loadedv43 = trunc i8 %19 to i1 + %conv44 = zext i1 %loadedv43 to i32 + %cmp45 = icmp eq i32 %conv44, 1 + %NoneAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 6 + %20 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp46.ascast, i64 %20) + %call47 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %NoneAcc, ptr byval(%"range") %agg.tmp46) + %storedv48 = zext i1 %cmp45 to i8 + store i8 %storedv48, ptr addrspace(4) %call47, align 1 + %call51 = call spir_func i32 @Foo64(ptr byval(%"tangle_group") %agg.tmp49, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp50) + store i32 %call51, ptr %ReduceResult, align 4 + %21 = load i32, ptr %ReduceResult, align 4 + %conv52 = zext i32 %21 to i64 + %22 = load i64, ptr %TangleSize.addr + %cmp53 = icmp eq i64 %conv52, %22 + %ReduceAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 7 + %23 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp54.ascast, i64 %23) + %call55 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ReduceAcc, ptr byval(%"range") %agg.tmp54) + %storedv56 = zext i1 %cmp53 to i8 + store i8 %storedv56, ptr addrspace(4) %call55, align 1 + %call59 = call spir_func i32 @Foo63(ptr byval(%"tangle_group") %agg.tmp57, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp58) + store i32 %call59, ptr %ExScanResult, align 4 + %24 = load i32, ptr %ExScanResult, align 4 + %25 = load i32, ptr %LID, align 4 + %cmp60 = icmp eq i32 %24, %25 + %ExScanAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 8 + %26 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp61.ascast, i64 %26) + %call62 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ExScanAcc, ptr byval(%"range") %agg.tmp61) + %storedv63 = zext i1 %cmp60 to i8 + store i8 %storedv63, ptr addrspace(4) %call62, align 1 + %call66 = call spir_func i32 @Foo62(ptr byval(%"tangle_group") %agg.tmp64, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp65) + store i32 %call66, ptr %IncScanResult, align 4 + %27 = load i32, ptr %IncScanResult, align 4 + %28 = load i32, ptr %LID, align 4 + %add67 = add i32 %28, 1 + %cmp68 = icmp eq i32 %27, %add67 + %IncScanAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 9 + %29 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp69.ascast, i64 %29) + %call70 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %IncScanAcc, ptr byval(%"range") %agg.tmp69) + %storedv71 = zext i1 %cmp68 to i8 + store i8 %storedv71, ptr addrspace(4) %call70, align 1 + %30 = load i32, ptr %LID, align 4 + %call73 = call spir_func i32 @Foo73(ptr byval(%"tangle_group") %agg.tmp72, i32 %30, i32 2) + store i32 %call73, ptr %ShiftLeftResult, align 4 + %31 = load i32, ptr %LID, align 4 + %add74 = add i32 %31, 2 + %conv75 = zext i32 %add74 to i64 + %32 = load i64, ptr %TangleSize.addr + %cmp76 = icmp uge i64 %conv75, %32 + br i1 %cmp76, label %lor.end, label %lor.rhs + +lor.rhs: ; preds = %for.cond.cleanup + %33 = load i32, ptr %ShiftLeftResult, align 4 + %34 = load i32, ptr %LID, align 4 + %add77 = add i32 %34, 2 + %cmp78 = icmp eq i32 %33, %add77 + br label %lor.end + +lor.end: ; preds = %lor.rhs, %for.cond.cleanup + %35 = phi i1 [ true, %for.cond.cleanup ], [ %cmp78, %lor.rhs ] + %ShiftLeftAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 10 + %36 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp79.ascast, i64 %36) + %call80 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ShiftLeftAcc, ptr byval(%"range") %agg.tmp79) + %storedv81 = zext i1 %35 to i8 + store i8 %storedv81, ptr addrspace(4) %call80, align 1 + %37 = load i32, ptr %LID, align 4 + %call83 = call spir_func i32 @Foo53(ptr byval(%"tangle_group") %agg.tmp82, i32 %37, i32 2) + store i32 %call83, ptr %ShiftRightResult, align 4 + %38 = load i32, ptr %LID, align 4 + %cmp84 = icmp ult i32 %38, 2 + br i1 %cmp84, label %lor.end87, label %lor.rhs85 + +lor.rhs85: ; preds = %lor.end + %39 = load i32, ptr %ShiftRightResult, align 4 + %40 = load i32, ptr %LID, align 4 + %sub = sub i32 %40, 2 + %cmp86 = icmp eq i32 %39, %sub + br label %lor.end87 + +lor.end87: ; preds = %lor.rhs85, %lor.end + %41 = phi i1 [ true, %lor.end ], [ %cmp86, %lor.rhs85 ] + %ShiftRightAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 11 + %42 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp88.ascast, i64 %42) + %call89 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ShiftRightAcc, ptr byval(%"range") %agg.tmp88) + %storedv90 = zext i1 %41 to i8 + store i8 %storedv90, ptr addrspace(4) %call89, align 1 + %43 = load i32, ptr %LID, align 4 + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp93.ascast, ptr addrspace(4) %Tangle.ascast) + store i32 2, ptr %ref.tmp94, align 4 + call spir_func void @Foo55(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) %ref.tmp93.ascast, ptr addrspace(4) align 4 %ref.tmp94.ascast) + call spir_func void @Foo56(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp92.ascast, ptr addrspace(4) %ref.tmp.ascast, ptr addrspace(4) %TangleSize.addr.ascast) + %call95 = call spir_func i32 @Foo57(ptr byval(%"tangle_group") %agg.tmp91, i32 %43, ptr byval(%"range") %agg.tmp92) + store i32 %call95, ptr %SelectResult, align 4 + %44 = load i32, ptr %SelectResult, align 4 + %conv96 = zext i32 %44 to i64 + %45 = load i32, ptr %LID, align 4 + %add97 = add i32 %45, 2 + %conv98 = zext i32 %add97 to i64 + %46 = load i64, ptr %TangleSize.addr + %rem = urem i64 %conv98, %46 + %cmp99 = icmp eq i64 %conv96, %rem + %SelectAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 12 + %47 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp100.ascast, i64 %47) + %call101 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %SelectAcc, ptr byval(%"range") %agg.tmp100) + %storedv102 = zext i1 %cmp99 to i8 + store i8 %storedv102, ptr addrspace(4) %call101, align 1 + %48 = load i32, ptr %LID, align 4 + %call104 = call spir_func i32 @Foo58(ptr byval(%"tangle_group") %agg.tmp103, i32 %48, i32 2) + store i32 %call104, ptr %PermuteXorResult, align 4 + %49 = load i32, ptr %PermuteXorResult, align 4 + %50 = load i32, ptr %LID, align 4 + %xor = xor i32 %50, 2 + %cmp105 = icmp eq i32 %49, %xor + %PermuteXorAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 13 + %51 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp106.ascast, i64 %51) + %call107 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %PermuteXorAcc, ptr byval(%"range") %agg.tmp106) + %storedv108 = zext i1 %cmp105 to i8 + store i8 %storedv108, ptr addrspace(4) %call107, align 1 + ret void + +for.body: ; preds = %for.cond + %52 = load i64, ptr %Other + %call3 = call spir_func zeroext i1 @Foo74(ptr addrspace(4) align 1 %IsMember.ascast, i64 %52) + br i1 %call3, label %if.then, label %if.end + +if.then: ; preds = %for.body + %TmpAcc42 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %53 = load i64, ptr %Other + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp5.ascast, i64 %53) + %call6 = call spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %TmpAcc42, ptr byval(%"range") %agg.tmp5) + %54 = load i64, ptr addrspace(4) %call6 + %55 = load i64, ptr %Visible + %add = add i64 %55, %54 + store i64 %add, ptr %Visible + br label %if.end + +if.end: ; preds = %if.then, %for.body + %56 = load i64, ptr %Other + %inc = add i64 %56, 1 + store i64 %inc, ptr %Other + br label %for.cond +} + + +define internal spir_func void @Foo69(ptr addrspace(4) %this, i64 %WI, ptr byval(%"tangle_group") %Tangle, i64 %TangleLeader, i64 %TangleSize, ptr byval(%"nd_item") align 1 %IsMember) { +entry: + %this.addr = alloca ptr addrspace(4) + %WI.addr = alloca i64 + %TangleLeader.addr = alloca i64 + %TangleSize.addr = alloca i64 + %agg.tmp = alloca %"range" + %agg.tmp2 = alloca %"tangle_group" + %Visible = alloca i64 + %Other = alloca i64 + %cleanup.dest.slot = alloca i32, align 4 + %agg.tmp5 = alloca %"range" + %agg.tmp8 = alloca %"range" + %OriginalLID = alloca i32, align 4 + %LID = alloca i32, align 4 + %BroadcastResult = alloca i32, align 4 + %agg.tmp12 = alloca %"tangle_group" + %agg.tmp15 = alloca %"range" + %AnyResult = alloca i8, align 1 + %agg.tmp18 = alloca %"tangle_group" + %agg.tmp24 = alloca %"range" + %AllResult = alloca i8, align 1 + %agg.tmp27 = alloca %"tangle_group" + %agg.tmp35 = alloca %"range" + %NoneResult = alloca i8, align 1 + %agg.tmp38 = alloca %"tangle_group" + %agg.tmp46 = alloca %"range" + %ReduceResult = alloca i32, align 4 + %agg.tmp49 = alloca %"tangle_group" + %agg.tmp50 = alloca %"nd_item", align 1 + %agg.tmp54 = alloca %"range" + %ExScanResult = alloca i32, align 4 + %agg.tmp57 = alloca %"tangle_group" + %agg.tmp58 = alloca %"nd_item", align 1 + %agg.tmp61 = alloca %"range" + %IncScanResult = alloca i32, align 4 + %agg.tmp64 = alloca %"tangle_group" + %agg.tmp65 = alloca %"nd_item", align 1 + %agg.tmp69 = alloca %"range" + %ShiftLeftResult = alloca i32, align 4 + %agg.tmp72 = alloca %"tangle_group" + %agg.tmp79 = alloca %"range" + %ShiftRightResult = alloca i32, align 4 + %agg.tmp82 = alloca %"tangle_group" + %agg.tmp88 = alloca %"range" + %SelectResult = alloca i32, align 4 + %agg.tmp91 = alloca %"tangle_group" + %agg.tmp92 = alloca %"range" + %ref.tmp = alloca %"range" + %ref.tmp93 = alloca %"range" + %ref.tmp94 = alloca i32, align 4 + %agg.tmp100 = alloca %"range" + %PermuteXorResult = alloca i32, align 4 + %agg.tmp103 = alloca %"tangle_group" + %agg.tmp106 = alloca %"range" + %TangleSize.addr.ascast = addrspacecast ptr %TangleSize.addr to ptr addrspace(4) + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + %agg.tmp5.ascast = addrspacecast ptr %agg.tmp5 to ptr addrspace(4) + %agg.tmp8.ascast = addrspacecast ptr %agg.tmp8 to ptr addrspace(4) + %agg.tmp15.ascast = addrspacecast ptr %agg.tmp15 to ptr addrspace(4) + %agg.tmp24.ascast = addrspacecast ptr %agg.tmp24 to ptr addrspace(4) + %agg.tmp35.ascast = addrspacecast ptr %agg.tmp35 to ptr addrspace(4) + %agg.tmp46.ascast = addrspacecast ptr %agg.tmp46 to ptr addrspace(4) + %agg.tmp54.ascast = addrspacecast ptr %agg.tmp54 to ptr addrspace(4) + %agg.tmp61.ascast = addrspacecast ptr %agg.tmp61 to ptr addrspace(4) + %agg.tmp69.ascast = addrspacecast ptr %agg.tmp69 to ptr addrspace(4) + %agg.tmp79.ascast = addrspacecast ptr %agg.tmp79 to ptr addrspace(4) + %agg.tmp88.ascast = addrspacecast ptr %agg.tmp88 to ptr addrspace(4) + %agg.tmp92.ascast = addrspacecast ptr %agg.tmp92 to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %ref.tmp93.ascast = addrspacecast ptr %ref.tmp93 to ptr addrspace(4) + %ref.tmp94.ascast = addrspacecast ptr %ref.tmp94 to ptr addrspace(4) + %agg.tmp100.ascast = addrspacecast ptr %agg.tmp100 to ptr addrspace(4) + %agg.tmp106.ascast = addrspacecast ptr %agg.tmp106 to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %WI, ptr %WI.addr + %Tangle.ascast = addrspacecast ptr %Tangle to ptr addrspace(4) + store i64 %TangleLeader, ptr %TangleLeader.addr + store i64 %TangleSize, ptr %TangleSize.addr + %IsMember.ascast = addrspacecast ptr %IsMember to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + %TmpAcc1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp.ascast, i64 %0) + %call = call spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %TmpAcc1, ptr byval(%"range") %agg.tmp) + store i64 1, ptr addrspace(4) %call + call spir_func void @Foo75(ptr byval(%"tangle_group") %agg.tmp2, i32 1) + store i64 0, ptr %Visible + store i64 0, ptr %Other + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %1 = load i64, ptr %Other + %cmp = icmp ult i64 %1, 32 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %2 = load i64, ptr %Visible + %3 = load i64, ptr %TangleSize.addr + %cmp7 = icmp eq i64 %2, %3 + %BarrierAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 1 + %4 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp8.ascast, i64 %4) + %call9 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %BarrierAcc, ptr byval(%"range") %agg.tmp8) + %storedv = zext i1 %cmp7 to i8 + store i8 %storedv, ptr addrspace(4) %call9, align 1 + %5 = getelementptr inbounds i8, ptr addrspace(4) %this1, i64 64 + %call10 = call spir_func i32 @Foo76(ptr addrspace(4) align 1 %5) + store i32 %call10, ptr %OriginalLID, align 4 + %call11 = call spir_func i32 @Foo90(ptr addrspace(4) %Tangle.ascast) + store i32 %call11, ptr %LID, align 4 + %6 = load i32, ptr %OriginalLID, align 4 + %call13 = call spir_func i32 @Foo91(ptr byval(%"tangle_group") %agg.tmp12, i32 %6, i32 0) + store i32 %call13, ptr %BroadcastResult, align 4 + %7 = load i32, ptr %BroadcastResult, align 4 + %conv = zext i32 %7 to i64 + %8 = load i64, ptr %TangleLeader.addr + %cmp14 = icmp eq i64 %conv, %8 + %BroadcastAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 3 + %9 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp15.ascast, i64 %9) + %call16 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %BroadcastAcc, ptr byval(%"range") %agg.tmp15) + %storedv17 = zext i1 %cmp14 to i8 + store i8 %storedv17, ptr addrspace(4) %call16, align 1 + %10 = load i32, ptr %LID, align 4 + %cmp19 = icmp eq i32 %10, 0 + %call20 = call spir_func zeroext i1 @Foo92(ptr byval(%"tangle_group") %agg.tmp18, i1 zeroext %cmp19) + %storedv21 = zext i1 %call20 to i8 + store i8 %storedv21, ptr %AnyResult, align 1 + %11 = load i8, ptr %AnyResult, align 1 + %loadedv = trunc i8 %11 to i1 + %conv22 = zext i1 %loadedv to i32 + %cmp23 = icmp eq i32 %conv22, 1 + %AnyAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 4 + %12 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp24.ascast, i64 %12) + %call25 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %AnyAcc, ptr byval(%"range") %agg.tmp24) + %storedv26 = zext i1 %cmp23 to i8 + store i8 %storedv26, ptr addrspace(4) %call25, align 1 + %13 = load i32, ptr %LID, align 4 + %conv28 = zext i32 %13 to i64 + %14 = load i64, ptr %TangleSize.addr + %cmp29 = icmp ult i64 %conv28, %14 + %call30 = call spir_func zeroext i1 @Foo67(ptr byval(%"tangle_group") %agg.tmp27, i1 zeroext %cmp29) + %storedv31 = zext i1 %call30 to i8 + store i8 %storedv31, ptr %AllResult, align 1 + %15 = load i8, ptr %AllResult, align 1 + %loadedv32 = trunc i8 %15 to i1 + %conv33 = zext i1 %loadedv32 to i32 + %cmp34 = icmp eq i32 %conv33, 1 + %AllAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 5 + %16 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp35.ascast, i64 %16) + %call36 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %AllAcc, ptr byval(%"range") %agg.tmp35) + %storedv37 = zext i1 %cmp34 to i8 + store i8 %storedv37, ptr addrspace(4) %call36, align 1 + %17 = load i32, ptr %LID, align 4 + %conv39 = zext i32 %17 to i64 + %18 = load i64, ptr %TangleSize.addr + %cmp40 = icmp uge i64 %conv39, %18 + %call41 = call spir_func zeroext i1 @Foo65(ptr byval(%"tangle_group") %agg.tmp38, i1 zeroext %cmp40) + %storedv42 = zext i1 %call41 to i8 + store i8 %storedv42, ptr %NoneResult, align 1 + %19 = load i8, ptr %NoneResult, align 1 + %loadedv43 = trunc i8 %19 to i1 + %conv44 = zext i1 %loadedv43 to i32 + %cmp45 = icmp eq i32 %conv44, 1 + %NoneAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 6 + %20 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp46.ascast, i64 %20) + %call47 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %NoneAcc, ptr byval(%"range") %agg.tmp46) + %storedv48 = zext i1 %cmp45 to i8 + store i8 %storedv48, ptr addrspace(4) %call47, align 1 + %call51 = call spir_func i32 @Foo64(ptr byval(%"tangle_group") %agg.tmp49, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp50) + store i32 %call51, ptr %ReduceResult, align 4 + %21 = load i32, ptr %ReduceResult, align 4 + %conv52 = zext i32 %21 to i64 + %22 = load i64, ptr %TangleSize.addr + %cmp53 = icmp eq i64 %conv52, %22 + %ReduceAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 7 + %23 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp54.ascast, i64 %23) + %call55 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ReduceAcc, ptr byval(%"range") %agg.tmp54) + %storedv56 = zext i1 %cmp53 to i8 + store i8 %storedv56, ptr addrspace(4) %call55, align 1 + %call59 = call spir_func i32 @Foo63(ptr byval(%"tangle_group") %agg.tmp57, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp58) + store i32 %call59, ptr %ExScanResult, align 4 + %24 = load i32, ptr %ExScanResult, align 4 + %25 = load i32, ptr %LID, align 4 + %cmp60 = icmp eq i32 %24, %25 + %ExScanAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 8 + %26 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp61.ascast, i64 %26) + %call62 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ExScanAcc, ptr byval(%"range") %agg.tmp61) + %storedv63 = zext i1 %cmp60 to i8 + store i8 %storedv63, ptr addrspace(4) %call62, align 1 + %call66 = call spir_func i32 @Foo62(ptr byval(%"tangle_group") %agg.tmp64, i32 1, ptr byval(%"nd_item") align 1 %agg.tmp65) + store i32 %call66, ptr %IncScanResult, align 4 + %27 = load i32, ptr %IncScanResult, align 4 + %28 = load i32, ptr %LID, align 4 + %add67 = add i32 %28, 1 + %cmp68 = icmp eq i32 %27, %add67 + %IncScanAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 9 + %29 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp69.ascast, i64 %29) + %call70 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %IncScanAcc, ptr byval(%"range") %agg.tmp69) + %storedv71 = zext i1 %cmp68 to i8 + store i8 %storedv71, ptr addrspace(4) %call70, align 1 + %30 = load i32, ptr %LID, align 4 + %call73 = call spir_func i32 @Foo73(ptr byval(%"tangle_group") %agg.tmp72, i32 %30, i32 2) + store i32 %call73, ptr %ShiftLeftResult, align 4 + %31 = load i32, ptr %LID, align 4 + %add74 = add i32 %31, 2 + %conv75 = zext i32 %add74 to i64 + %32 = load i64, ptr %TangleSize.addr + %cmp76 = icmp uge i64 %conv75, %32 + br i1 %cmp76, label %lor.end, label %lor.rhs + +lor.rhs: ; preds = %for.cond.cleanup + %33 = load i32, ptr %ShiftLeftResult, align 4 + %34 = load i32, ptr %LID, align 4 + %add77 = add i32 %34, 2 + %cmp78 = icmp eq i32 %33, %add77 + br label %lor.end + +lor.end: ; preds = %lor.rhs, %for.cond.cleanup + %35 = phi i1 [ true, %for.cond.cleanup ], [ %cmp78, %lor.rhs ] + %ShiftLeftAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 10 + %36 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp79.ascast, i64 %36) + %call80 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ShiftLeftAcc, ptr byval(%"range") %agg.tmp79) + %storedv81 = zext i1 %35 to i8 + store i8 %storedv81, ptr addrspace(4) %call80, align 1 + %37 = load i32, ptr %LID, align 4 + %call83 = call spir_func i32 @Foo53(ptr byval(%"tangle_group") %agg.tmp82, i32 %37, i32 2) + store i32 %call83, ptr %ShiftRightResult, align 4 + %38 = load i32, ptr %LID, align 4 + %cmp84 = icmp ult i32 %38, 2 + br i1 %cmp84, label %lor.end87, label %lor.rhs85 + +lor.rhs85: ; preds = %lor.end + %39 = load i32, ptr %ShiftRightResult, align 4 + %40 = load i32, ptr %LID, align 4 + %sub = sub i32 %40, 2 + %cmp86 = icmp eq i32 %39, %sub + br label %lor.end87 + +lor.end87: ; preds = %lor.rhs85, %lor.end + %41 = phi i1 [ true, %lor.end ], [ %cmp86, %lor.rhs85 ] + %ShiftRightAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 11 + %42 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp88.ascast, i64 %42) + %call89 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %ShiftRightAcc, ptr byval(%"range") %agg.tmp88) + %storedv90 = zext i1 %41 to i8 + store i8 %storedv90, ptr addrspace(4) %call89, align 1 + %43 = load i32, ptr %LID, align 4 + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp93.ascast, ptr addrspace(4) %Tangle.ascast) + store i32 2, ptr %ref.tmp94, align 4 + call spir_func void @Foo55(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) %ref.tmp93.ascast, ptr addrspace(4) align 4 %ref.tmp94.ascast) + call spir_func void @Foo56(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.tmp92.ascast, ptr addrspace(4) %ref.tmp.ascast, ptr addrspace(4) %TangleSize.addr.ascast) + %call95 = call spir_func i32 @Foo57(ptr byval(%"tangle_group") %agg.tmp91, i32 %43, ptr byval(%"range") %agg.tmp92) + store i32 %call95, ptr %SelectResult, align 4 + %44 = load i32, ptr %SelectResult, align 4 + %conv96 = zext i32 %44 to i64 + %45 = load i32, ptr %LID, align 4 + %add97 = add i32 %45, 2 + %conv98 = zext i32 %add97 to i64 + %46 = load i64, ptr %TangleSize.addr + %rem = urem i64 %conv98, %46 + %cmp99 = icmp eq i64 %conv96, %rem + %SelectAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 12 + %47 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp100.ascast, i64 %47) + %call101 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %SelectAcc, ptr byval(%"range") %agg.tmp100) + %storedv102 = zext i1 %cmp99 to i8 + store i8 %storedv102, ptr addrspace(4) %call101, align 1 + %48 = load i32, ptr %LID, align 4 + %call104 = call spir_func i32 @Foo58(ptr byval(%"tangle_group") %agg.tmp103, i32 %48, i32 2) + store i32 %call104, ptr %PermuteXorResult, align 4 + %49 = load i32, ptr %PermuteXorResult, align 4 + %50 = load i32, ptr %LID, align 4 + %xor = xor i32 %50, 2 + %cmp105 = icmp eq i32 %49, %xor + %PermuteXorAcc = getelementptr inbounds nuw %class.anon.8, ptr addrspace(4) %this1, i32 0, i32 13 + %51 = load i64, ptr %WI.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp106.ascast, i64 %51) + %call107 = call spir_func align 1 ptr addrspace(4) @Foo54(ptr addrspace(4) %PermuteXorAcc, ptr byval(%"range") %agg.tmp106) + %storedv108 = zext i1 %cmp105 to i8 + store i8 %storedv108, ptr addrspace(4) %call107, align 1 + ret void + +for.body: ; preds = %for.cond + %52 = load i64, ptr %Other + %call3 = call spir_func zeroext i1 @Bar14(ptr addrspace(4) align 1 %IsMember.ascast, i64 %52) + br i1 %call3, label %if.then, label %if.end + +if.then: ; preds = %for.body + %TmpAcc42 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %53 = load i64, ptr %Other + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp5.ascast, i64 %53) + %call6 = call spir_func ptr addrspace(4) @Foo70(ptr addrspace(4) %TmpAcc42, ptr byval(%"range") %agg.tmp5) + %54 = load i64, ptr addrspace(4) %call6 + %55 = load i64, ptr %Visible + %add = add i64 %55, %54 + store i64 %add, ptr %Visible + br label %if.end + +if.end: ; preds = %if.then, %for.body + %56 = load i64, ptr %Other + %inc = add i64 %56, 1 + store i64 %inc, ptr %Other + br label %for.cond +} + + +define internal spir_func zeroext i1 @Bar14(ptr addrspace(4) align 1 %this, i64 %Other) { +entry: + %retval = alloca i1, align 1 + %this.addr = alloca ptr addrspace(4) + %Other.addr = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %Other, ptr %Other.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i64, ptr %Other.addr + %cmp = icmp uge i64 %0, 24 + br i1 %cmp, label %land.rhs, label %land.end + +land.rhs: ; preds = %entry + %1 = load i64, ptr %Other.addr + %cmp2 = icmp ult i64 %1, 32 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %2 = phi i1 [ false, %entry ], [ %cmp2, %land.rhs ] + ret i1 %2 +} + + +define internal spir_func zeroext i1 @Foo74(ptr addrspace(4) align 1 %this, i64 %Other) { +entry: + %retval = alloca i1, align 1 + %this.addr = alloca ptr addrspace(4) + %Other.addr = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %Other, ptr %Other.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i64, ptr %Other.addr + %cmp = icmp uge i64 %0, 4 + br i1 %cmp, label %land.rhs, label %land.end + +land.rhs: ; preds = %entry + %1 = load i64, ptr %Other.addr + %cmp2 = icmp ult i64 %1, 24 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %2 = phi i1 [ false, %entry ], [ %cmp2, %land.rhs ] + ret i1 %2 +} + + +define internal spir_func i32 @Bar13(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"range") %mask) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %TargetLocalId = alloca %"range" + %ref.tmp = alloca %"range" + %TargetId = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %x.addr.ascast = addrspacecast ptr %x.addr to ptr addrspace(4) + %TargetLocalId.ascast = addrspacecast ptr %TargetLocalId to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %mask.ascast = addrspacecast ptr %mask to ptr addrspace(4) + %0 = addrspacecast ptr addrspace(1) @_ZSt6ignore to ptr addrspace(4) + %call = call spir_func align 1 ptr addrspace(4) @Bar15(ptr addrspace(4) align 1 %0, ptr addrspace(4) %g.ascast) + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) %g.ascast) + call spir_func void @Bar16(ptr addrspace(4) dead_on_unwind writable sret(%"range") %TargetLocalId.ascast, ptr addrspace(4) %ref.tmp.ascast, ptr addrspace(4) %mask.ascast) + %call2 = call spir_func i32 @Foo48(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"range") %agg.tmp1) + store i32 %call2, ptr %TargetId, align 4 + %call3 = call spir_func i32 @Foo49(ptr addrspace(4) align 4 %x.addr.ascast) + %1 = load i32, ptr %TargetId, align 4 + %call4 = call spir_func i32 @Foo50(i32 3, i32 %call3, i32 %1) + ret i32 %call4 +} + + +define internal spir_func align 1 ptr addrspace(4) @Bar15(ptr addrspace(4) align 1 %this, ptr addrspace(4) %0) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %0, ptr %.addr + %this1 = load ptr addrspace(4), ptr %this.addr + ret ptr addrspace(4) %this1 +} + + +define internal spir_func void @Bar16(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) %lhs, ptr addrspace(4) %rhs) { +entry: + %lhs.addr = alloca ptr addrspace(4) + %rhs.addr = alloca ptr addrspace(4) + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + store ptr addrspace(4) %lhs, ptr %lhs.addr + store ptr addrspace(4) %rhs, ptr %rhs.addr + call spir_func void @Foo11(ptr addrspace(4) %agg.result) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %1 = load ptr addrspace(4), ptr %lhs.addr + %common_array2 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %2 = load i32, ptr %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array2, i64 0, i64 %idxprom + %3 = load i64, ptr addrspace(4) %arrayidx + %4 = load ptr addrspace(4), ptr %rhs.addr + %common_array13 = bitcast ptr addrspace(4) %4 to ptr addrspace(4) + %5 = load i32, ptr %i, align 4 + %idxprom2 = sext i32 %5 to i64 + %arrayidx3 = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array13, i64 0, i64 %idxprom2 + %6 = load i64, ptr addrspace(4) %arrayidx3 + %xor = xor i64 %3, %6 + %common_array44 = bitcast ptr addrspace(4) %agg.result to ptr addrspace(4) + %7 = load i32, ptr %i, align 4 + %idxprom5 = sext i32 %7 to i64 + %arrayidx6 = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array44, i64 0, i64 %idxprom5 + store i64 %xor, ptr addrspace(4) %arrayidx6 + %8 = load i32, ptr %i, align 4 + %inc = add nsw i32 %8, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond +} + + +define internal spir_func i32 @Foo48(ptr byval(%"tangle_group") %g, ptr byval(%"range") %local_id) { +entry: + %retval.i = alloca i64 + %this.addr.i = alloca ptr addrspace(4) + %Result.i = alloca i64 + %retval = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %local_id.ascast = addrspacecast ptr %local_id to ptr addrspace(4) + %retval.ascast.i = addrspacecast ptr %retval.i to ptr addrspace(4) + store ptr addrspace(4) %local_id.ascast, ptr %this.addr.i + %this1.i = load ptr addrspace(4), ptr %this.addr.i + %0 = load i64, ptr addrspace(4) %this1.i + store i64 %0, ptr %Result.i + %1 = load i64, ptr %Result.i + %conv = trunc i64 %1 to i32 + %call1 = call spir_func i32 @Bar17(ptr byval(%"tangle_group") %agg.tmp, i32 %conv) + ret i32 %call1 +} + + +define internal spir_func i32 @Foo49(ptr addrspace(4) align 4 %x) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %x, ptr %x.addr + %0 = load ptr addrspace(4), ptr %x.addr + %1 = load i32, ptr addrspace(4) %0, align 4 + ret i32 %1 +} + +declare dso_local spir_func i32 @Foo50(i32, i32, i32) + + +define internal spir_func i32 @Bar17(ptr byval(%"tangle_group") %Group, i32 %Id) { +entry: + %retval = alloca i32, align 4 + %Id.addr = alloca i32, align 4 + %MemberMask = alloca %"vec.16", align 16 + %agg.tmp = alloca %"ss_sub_group_mask" + %agg.tmp1 = alloca %"tangle_group" + %Count = alloca i32, align 4 + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %b = alloca i32, align 4 + %MemberMask.ascast = addrspacecast ptr %MemberMask to ptr addrspace(4) + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + store i32 %Id, ptr %Id.addr, align 4 + call spir_func void @Bar18(ptr addrspace(4) dead_on_unwind writable sret(%"ss_sub_group_mask") %agg.tmp.ascast, ptr byval(%"tangle_group") %agg.tmp1) + call spir_func void @Bar19(ptr addrspace(4) dead_on_unwind writable sret(%"vec.16") align 16 %MemberMask.ascast, ptr byval(%"ss_sub_group_mask") %agg.tmp) + store i32 0, ptr %Count, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.end, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 4 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 2, ptr %cleanup.dest.slot, align 4 + br label %cleanup12 + +for.body: ; preds = %for.cond + store i32 0, ptr %b, align 4 + br label %for.cond2 + +for.cond2: ; preds = %if.end8, %for.body + %1 = load i32, ptr %b, align 4 + %cmp3 = icmp slt i32 %1, 32 + br i1 %cmp3, label %for.body5, label %for.cond.cleanup4 + +for.cond.cleanup4: ; preds = %for.cond2 + store i32 5, ptr %cleanup.dest.slot, align 4 + br label %cleanup + +for.body5: ; preds = %for.cond2 + %2 = load i32, ptr %i, align 4 + %call = call spir_func align 4 ptr addrspace(4) @Bar20(ptr addrspace(4) align 16 %MemberMask.ascast, i32 %2) + %3 = load i32, ptr addrspace(4) %call, align 4 + %4 = load i32, ptr %b, align 4 + %shl = shl i32 1, %4 + %and = and i32 %3, %shl + %tobool = icmp ne i32 %and, 0 + br i1 %tobool, label %if.then, label %if.end8 + +if.then: ; preds = %for.body5 + %5 = load i32, ptr %Count, align 4 + %6 = load i32, ptr %Id.addr, align 4 + %cmp6 = icmp eq i32 %5, %6 + br i1 %cmp6, label %if.then7, label %if.end + +if.end: ; preds = %if.then + %7 = load i32, ptr %Count, align 4 + %inc = add i32 %7, 1 + store i32 %inc, ptr %Count, align 4 + br label %if.end8 + +if.end8: ; preds = %if.end, %for.body5 + %8 = load i32, ptr %b, align 4 + %inc9 = add nsw i32 %8, 1 + store i32 %inc9, ptr %b, align 4 + br label %for.cond2 + +if.then7: ; preds = %if.then + %9 = load i32, ptr %i, align 4 + %mul = mul nsw i32 %9, 32 + %10 = load i32, ptr %b, align 4 + %add = add nsw i32 %mul, %10 + store i32 %add, ptr %retval, align 4 + store i32 1, ptr %cleanup.dest.slot, align 4 + br label %cleanup + +cleanup: ; preds = %if.then7, %for.cond.cleanup4 + %cleanup.dest = load i32, ptr %cleanup.dest.slot, align 4 + %cond = icmp eq i32 %cleanup.dest, 5 + br i1 %cond, label %for.end, label %cleanup12 + +for.end: ; preds = %cleanup + %11 = load i32, ptr %i, align 4 + %inc11 = add nsw i32 %11, 1 + store i32 %inc11, ptr %i, align 4 + br label %for.cond + +cleanup12: ; preds = %cleanup, %for.cond.cleanup + %cleanup.dest13 = load i32, ptr %cleanup.dest.slot, align 4 + %cond1 = icmp eq i32 %cleanup.dest13, 2 + br i1 %cond1, label %for.end14, label %cleanup15 + +for.end14: ; preds = %cleanup12 + %12 = load i32, ptr %Count, align 4 + store i32 %12, ptr %retval, align 4 + store i32 1, ptr %cleanup.dest.slot, align 4 + br label %cleanup15 + +cleanup15: ; preds = %cleanup12, %for.end14 + %13 = load i32, ptr %retval, align 4 + ret i32 %13 +} + + +define internal spir_func void @Bar18(ptr addrspace(4) dead_on_unwind noalias writable sret(%"ss_sub_group_mask") %agg.result, ptr byval(%"tangle_group") %Group) { +entry: + %Mask1 = bitcast ptr %Group to ptr + ret void +} + + +define internal spir_func void @Bar19(ptr addrspace(4) dead_on_unwind noalias writable sret(%"vec.16") align 16 %agg.result, ptr byval(%"ss_sub_group_mask") %Mask) { +entry: + %TmpMArray = alloca %"struct.std::array.20", align 4 + %agg.tmp = alloca %"range" + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %cleanup.dest.slot2 = alloca i32, align 4 + %TmpMArray.ascast = addrspacecast ptr %TmpMArray to ptr addrspace(4) + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + %Mask.ascast = addrspacecast ptr %Mask to ptr addrspace(4) + call spir_func void @Bar50(ptr addrspace(4) align 4 %TmpMArray.ascast) + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp.ascast, i64 0) + call spir_func void @Bar51(ptr addrspace(4) %Mask.ascast, ptr addrspace(4) align 4 %TmpMArray.ascast, ptr byval(%"range") %agg.tmp) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 4 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.end: ; preds = %for.cond.cleanup + ret void + +for.body: ; preds = %for.cond + %1 = load i32, ptr %i, align 4 + %conv = sext i32 %1 to i64 + %call = call spir_func align 4 ptr addrspace(4) @Bar57(ptr addrspace(4) align 4 %TmpMArray.ascast, i64 %conv) + %2 = load i32, ptr addrspace(4) %call, align 4 + %3 = load i32, ptr %i, align 4 + %call1 = call spir_func align 4 ptr addrspace(4) @Bar20(ptr addrspace(4) align 16 %agg.result, i32 %3) + store i32 %2, ptr addrspace(4) %call1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %4 = load i32, ptr %i, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond +} + + +define internal spir_func align 4 ptr addrspace(4) @Bar20(ptr addrspace(4) align 16 %this, i32 %i) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %i.addr = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i32 %i, ptr %i.addr, align 4 + %this1 = load ptr addrspace(4), ptr %this.addr + %m_Data1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i32, ptr %i.addr, align 4 + %conv = sext i32 %0 to i64 + %call = call spir_func align 4 ptr addrspace(4) @_ZNSt5arrayIjLm4EEixEm(ptr addrspace(4) align 4 %m_Data1, i64 %conv) + ret ptr addrspace(4) %call +} + + +define internal spir_func align 4 ptr addrspace(4) @_ZNSt5arrayIjLm4EEixEm(ptr addrspace(4) align 4 %this, i64 %__n) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %__n.addr = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %__n, ptr %__n.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %_M_elems1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %__n.addr + %call = call spir_func align 4 ptr addrspace(4) @_ZNSt14__array_traitsIjLm4EE6_S_refERA4_Kjm(ptr addrspace(4) align 4 %_M_elems1, i64 %0) + ret ptr addrspace(4) %call +} + + +define internal spir_func align 4 ptr addrspace(4) @_ZNSt14__array_traitsIjLm4EE6_S_refERA4_Kjm(ptr addrspace(4) align 4 %__t, i64 %__n) { +entry: + %retval = alloca ptr addrspace(4) + %__t.addr = alloca ptr addrspace(4) + %__n.addr = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %__t, ptr %__t.addr + store i64 %__n, ptr %__n.addr + %0 = load ptr addrspace(4), ptr %__t.addr + %1 = load i64, ptr %__n.addr + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr addrspace(4) %0, i64 0, i64 %1 + ret ptr addrspace(4) %arrayidx +} + + +define internal spir_func void @Bar50(ptr addrspace(4) align 4 %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = inttoptr i64 16 to ptr addrspace(4) + br label %arrayinit.body + +arrayinit.body: ; preds = %arrayinit.body, %entry + %lsr.iv = phi i64 [ %lsr.iv.next, %arrayinit.body ], [ 0, %entry ] + %scevgep = getelementptr i8, ptr addrspace(4) %this1, i64 %lsr.iv + store i32 0, ptr addrspace(4) %scevgep, align 4 + %lsr.iv.next = add nuw nsw i64 %lsr.iv, 4 + %lsr.iv.next1 = inttoptr i64 %lsr.iv.next to ptr addrspace(4) + %arrayinit.done = icmp eq ptr addrspace(4) %lsr.iv.next1, %0 + br i1 %arrayinit.done, label %arrayinit.end2, label %arrayinit.body + +arrayinit.end2: ; preds = %arrayinit.body + ret void +} + + +define internal spir_func void @Bar51(ptr addrspace(4) %this, ptr addrspace(4) align 4 %bits, ptr byval(%"range") %pos) { +entry: + %this.addr = alloca ptr addrspace(4) + %bits.addr = alloca ptr addrspace(4) + %cur_pos = alloca i64 + %__range4 = alloca ptr addrspace(4) + %__begin0 = alloca ptr addrspace(4) + %__end0 = alloca ptr addrspace(4) + %cleanup.dest.slot = alloca i32, align 4 + %elem = alloca ptr addrspace(4) + %agg.tmp = alloca %"range" + %agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %bits, ptr %bits.addr + %pos.ascast = addrspacecast ptr %pos to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + %call = call spir_func i64 @Bar52(ptr addrspace(4) %pos.ascast, i32 0) + store i64 %call, ptr %cur_pos + %0 = load ptr addrspace(4), ptr %bits.addr + store ptr addrspace(4) %0, ptr %__range4 + %1 = load ptr addrspace(4), ptr %__range4 + %call2 = call spir_func ptr addrspace(4) @Bar53(ptr addrspace(4) align 4 %1) + store ptr addrspace(4) %call2, ptr %__begin0 + %2 = load ptr addrspace(4), ptr %__range4 + %call3 = call spir_func ptr addrspace(4) @Bar54(ptr addrspace(4) align 4 %2) + store ptr addrspace(4) %call3, ptr %__end0 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %3 = load ptr addrspace(4), ptr %__begin0 + %4 = load ptr addrspace(4), ptr %__end0 + %cmp = icmp ne ptr addrspace(4) %3, %4 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.end: ; preds = %for.cond.cleanup + ret void + +for.body: ; preds = %for.cond + %5 = load ptr addrspace(4), ptr %__begin0 + store ptr addrspace(4) %5, ptr %elem + %6 = load i64, ptr %cur_pos + %call4 = call spir_func i32 @Bar55(ptr addrspace(4) %this1) + %conv = zext i32 %call4 to i64 + %cmp5 = icmp ult i64 %6, %conv + br i1 %cmp5, label %if.then, label %if.else + +if.else: ; preds = %for.body + %7 = load ptr addrspace(4), ptr %elem + store i32 0, ptr addrspace(4) %7, align 4 + br label %if.end + +if.then: ; preds = %for.body + %8 = load ptr addrspace(4), ptr %elem + %9 = load i64, ptr %cur_pos + call spir_func void @Foo46(ptr addrspace(4) %agg.tmp.ascast, i64 %9) + call spir_func void @Bar56(ptr addrspace(4) %this1, ptr addrspace(4) align 4 %8, ptr byval(%"range") %agg.tmp) + %10 = load i64, ptr %cur_pos + %add = add i64 %10, 32 + store i64 %add, ptr %cur_pos + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %11 = load ptr addrspace(4), ptr %__begin0 + %incdec.ptr = getelementptr inbounds nuw i32, ptr addrspace(4) %11, i32 1 + store ptr addrspace(4) %incdec.ptr, ptr %__begin0 + br label %for.cond +} + + +define internal spir_func align 4 ptr addrspace(4) @Bar57(ptr addrspace(4) align 4 %this, i64 %index) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %index.addr = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i64 %index, ptr %index.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %MData1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %index.addr + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr addrspace(4) %MData1, i64 0, i64 %0 + ret ptr addrspace(4) %arrayidx +} + + +define internal spir_func i64 @Bar52(ptr addrspace(4) %this, i32 %dimension) { +entry: + %this.addr.i = alloca ptr addrspace(4) + %dimension.addr.i = alloca i32, align 4 + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %dimension.addr = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i32 %dimension, ptr %dimension.addr, align 4 + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i32, ptr %dimension.addr, align 4 + store ptr addrspace(4) %this1, ptr %this.addr.i + store i32 %0, ptr %dimension.addr.i, align 4 + %this1.i = load ptr addrspace(4), ptr %this.addr.i + %common_array1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load i32, ptr %dimension.addr, align 4 + %idxprom = sext i32 %1 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array1, i64 0, i64 %idxprom + %2 = load i64, ptr addrspace(4) %arrayidx + ret i64 %2 +} + + +define internal spir_func ptr addrspace(4) @Bar53(ptr addrspace(4) align 4 %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + + + %this.addr1 = bitcast ptr %this.addr to ptr + %this.addr2 = bitcast ptr %this.addr1 to ptr + %this1 = load ptr addrspace(4), ptr %this.addr2 + + + +; %this1 = load ptr addrspace(4), ptr %this.addr + %MData1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %arraydecay2 = bitcast ptr addrspace(4) %MData1 to ptr addrspace(4) + ret ptr addrspace(4) %arraydecay2 +} + + +define internal spir_func ptr addrspace(4) @Bar54(ptr addrspace(4) align 4 %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + + + %this.addr1 = bitcast ptr %this.addr to ptr + %this.addr2 = bitcast ptr %this.addr1 to ptr + %this1 = load ptr addrspace(4), ptr %this.addr2 + +; %this1 = load ptr addrspace(4), ptr %this.addr + %MData1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %arraydecay2 = bitcast ptr addrspace(4) %MData1 to ptr addrspace(4) + %add.ptr = getelementptr inbounds nuw i32, ptr addrspace(4) %arraydecay2, i64 4 + ret ptr addrspace(4) %add.ptr +} + + +define internal spir_func i32 @Bar55(ptr addrspace(4) %this) { +entry: + %retval = alloca i32, align 4 + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %bits_num = getelementptr inbounds nuw %"ss_sub_group_mask", ptr addrspace(4) %this1, i32 0, i32 1 + %0 = load i64, ptr addrspace(4) %bits_num + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + + +define internal spir_func void @Bar56(ptr addrspace(4) %this, ptr addrspace(4) align 4 %bits, ptr byval(%"range") %pos) { +entry: + %this.addr = alloca ptr addrspace(4) + %bits.addr = alloca ptr addrspace(4) + %Res = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %bits, ptr %bits.addr + %pos.ascast = addrspacecast ptr %pos to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + %Bits1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr addrspace(4) %Bits1 + store i64 %0, ptr %Res + %bits_num = getelementptr inbounds nuw %"ss_sub_group_mask", ptr addrspace(4) %this1, i32 0, i32 1 + %1 = load i64, ptr addrspace(4) %bits_num + %call = call spir_func i64 @Bar58(ptr addrspace(4) %this1, i64 %1) + %2 = load i64, ptr %Res + %and = and i64 %2, %call + store i64 %and, ptr %Res + %call2 = call spir_func i64 @Bar52(ptr addrspace(4) %pos.ascast, i32 0) + %call3 = call spir_func i32 @Bar55(ptr addrspace(4) %this1) + %conv = zext i32 %call3 to i64 + %cmp = icmp ult i64 %call2, %conv + br i1 %cmp, label %if.then, label %if.else + +if.else: ; preds = %entry + %3 = load ptr addrspace(4), ptr %bits.addr + store i32 0, ptr addrspace(4) %3, align 4 + br label %if.end11 + +if.then: ; preds = %entry + %call4 = call spir_func i64 @Bar52(ptr addrspace(4) %pos.ascast, i32 0) + %cmp5 = icmp ugt i64 %call4, 0 + br i1 %cmp5, label %if.then6, label %if.end + +if.then6: ; preds = %if.then + %call7 = call spir_func i64 @Bar52(ptr addrspace(4) %pos.ascast, i32 0) + %4 = load i64, ptr %Res + %shr = lshr i64 %4, %call7 + store i64 %shr, ptr %Res + br label %if.end + +if.end: ; preds = %if.then6, %if.then + %call8 = call spir_func i64 @Bar58(ptr addrspace(4) %this1, i64 32) + %5 = load i64, ptr %Res + %and9 = and i64 %5, %call8 + store i64 %and9, ptr %Res + %6 = load i64, ptr %Res + %conv10 = trunc i64 %6 to i32 + %7 = load ptr addrspace(4), ptr %bits.addr + store i32 %conv10, ptr addrspace(4) %7, align 4 + br label %if.end11 + +if.end11: ; preds = %if.else, %if.end + ret void +} + + +define internal spir_func i64 @Bar58(ptr addrspace(4) %this, i64 %bn) { +entry: + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %bn.addr = alloca i64 + %one = alloca i64 + %cleanup.dest.slot = alloca i32, align 4 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %bn, ptr %bn.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i64, ptr %bn.addr + %cmp = icmp ule i64 %0, 64 + %1 = addrspacecast ptr addrspace(1) @.str.2 to ptr addrspace(4) + %2 = addrspacecast ptr addrspace(1) @.str.1 to ptr addrspace(4) + %3 = addrspacecast ptr addrspace(1) @__PRETTY_FUNCTION2 to ptr addrspace(4) + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + call spir_func void @__assert_fail(ptr addrspace(4) %1, ptr addrspace(4) %2, i32 327, ptr addrspace(4) %3) + br label %cond.end + +cond.end: ; preds = %entry, %cond.false + store i64 1, ptr %one + %4 = load i64, ptr %bn.addr + %cmp2 = icmp eq i64 %4, 64 + br i1 %cmp2, label %if.then, label %if.end + +if.end: ; preds = %cond.end + %5 = load i64, ptr %one + %6 = load i64, ptr %bn.addr + %shl = shl i64 %5, %6 + %7 = load i64, ptr %one + %sub3 = sub i64 %shl, %7 + store i64 %sub3, ptr %retval + store i32 1, ptr %cleanup.dest.slot, align 4 + br label %cleanup + +if.then: ; preds = %cond.end + %8 = load i64, ptr %one + %sub = sub i64 0, %8 + store i64 %sub, ptr %retval + store i32 1, ptr %cleanup.dest.slot, align 4 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %9 = load i64, ptr %retval + ret i64 %9 +} + + + + +define internal spir_func void @Foo11(ptr addrspace(4) %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + call spir_func void @Foo60(ptr addrspace(4) %this1, i64 0) + ret void +} + + +define internal spir_func void @Foo60(ptr addrspace(4) %this, i64 %dim0) { +entry: + %this.addr = alloca ptr addrspace(4) + %dim0.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %dim0, ptr %dim0.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %common_array1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %dim0.addr + store i64 %0, ptr addrspace(4) %common_array1 + ret void +} + + +define internal spir_func i32 @Foo59(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"range") %local_id) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %LocalId = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %x.addr.ascast = addrspacecast ptr %x.addr to ptr addrspace(4) + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %call = call spir_func i32 @Foo48(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"range") %agg.tmp1) + store i32 %call, ptr %LocalId, align 4 + %0 = addrspacecast ptr addrspace(1) @_ZSt6ignore to ptr addrspace(4) + %call2 = call spir_func align 1 ptr addrspace(4) @Bar15(ptr addrspace(4) align 1 %0, ptr addrspace(4) %g.ascast) + %call3 = call spir_func i32 @Foo49(ptr addrspace(4) align 4 %x.addr.ascast) + %1 = load i32, ptr %LocalId, align 4 + %call4 = call spir_func i32 @Foo50(i32 3, i32 %call3, i32 %1) + ret i32 %call4 +} + + +define internal spir_func i32 @Foo47(ptr byval(%"ss_sub_group_mask") %Mask) { +entry: + %retval = alloca i32, align 4 + %MemberMask = alloca %"vec.16", align 16 + %agg.tmp = alloca %"ss_sub_group_mask" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %MemberMask.ascast = addrspacecast ptr %MemberMask to ptr addrspace(4) + call spir_func void @Bar19(ptr addrspace(4) dead_on_unwind writable sret(%"vec.16") align 16 %MemberMask.ascast, ptr byval(%"ss_sub_group_mask") %agg.tmp) + %call = call spir_func <4 x i32> @Bar59(ptr addrspace(4) align 16 %MemberMask.ascast) + %call1 = call spir_func i32 @_Z37__spirv_GroupNonUniformBallotBitCountN5__spv5Scope4FlagEiDv4_j(i32 3, i32 2, <4 x i32> %call) + ret i32 %call1 +} + + +define internal spir_func <4 x i32> @Bar59(ptr addrspace(4) align 16 %x) { +entry: + %retval = alloca <4 x i32>, align 16 + %x.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %x, ptr %x.addr + %0 = load ptr addrspace(4), ptr %x.addr + %call = call spir_func <4 x i32> @Bar60(ptr addrspace(4) align 16 %0) + ret <4 x i32> %call +} + +declare dso_local spir_func i32 @_Z37__spirv_GroupNonUniformBallotBitCountN5__spv5Scope4FlagEiDv4_j(i32, i32, <4 x i32>) + + +define internal spir_func <4 x i32> @Bar60(ptr addrspace(4) align 16 %from) { +entry: + %retval = alloca <4 x i32>, align 16 + %from.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %from, ptr %from.addr + %0 = load ptr addrspace(4), ptr %from.addr + %1 = load <4 x i32>, ptr addrspace(4) %0, align 16 + ret <4 x i32> %1 +} + + +define internal spir_func i32 @Foo52(ptr byval(%"tangle_group") %g, i32 %x, i32 %delta) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %delta.addr = alloca i32, align 4 + %TargetLocalId = alloca %"range" + %TargetId = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp3 = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %x.addr.ascast = addrspacecast ptr %x.addr to ptr addrspace(4) + %TargetLocalId.ascast = addrspacecast ptr %TargetLocalId to ptr addrspace(4) + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i32 %delta, ptr %delta.addr, align 4 + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %TargetLocalId.ascast, ptr addrspace(4) %g.ascast) + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %TargetLocalId.ascast, i32 0) + %0 = load i64, ptr addrspace(4) %call + %1 = load i32, ptr %delta.addr, align 4 + %conv = zext i32 %1 to i64 + %cmp = icmp uge i64 %0, %conv + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32, ptr %delta.addr, align 4 + %conv1 = zext i32 %2 to i64 + %call2 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %TargetLocalId.ascast, i32 0) + %3 = load i64, ptr addrspace(4) %call2 + %sub = sub i64 %3, %conv1 + store i64 %sub, ptr addrspace(4) %call2 + br label %if.end + +if.end: ; preds = %if.then, %entry + %call4 = call spir_func i32 @Foo48(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"range") %agg.tmp3) + store i32 %call4, ptr %TargetId, align 4 + %call5 = call spir_func i32 @Foo49(ptr addrspace(4) align 4 %x.addr.ascast) + %4 = load i32, ptr %TargetId, align 4 + %call6 = call spir_func i32 @Foo50(i32 3, i32 %call5, i32 %4) + ret i32 %call6 +} + + +define internal spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %this, i32 %dimension) { +entry: + %this.addr.i = alloca ptr addrspace(4) + %dimension.addr.i = alloca i32, align 4 + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %dimension.addr = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i32 %dimension, ptr %dimension.addr, align 4 + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i32, ptr %dimension.addr, align 4 + store ptr addrspace(4) %this1, ptr %this.addr.i + store i32 %0, ptr %dimension.addr.i, align 4 + %this1.i = load ptr addrspace(4), ptr %this.addr.i + %common_array1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load i32, ptr %dimension.addr, align 4 + %idxprom = sext i32 %1 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array1, i64 0, i64 %idxprom + ret ptr addrspace(4) %arrayidx +} + + +define internal spir_func i32 @Foo72(ptr byval(%"tangle_group") %g, i32 %x, i32 %delta) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %delta.addr = alloca i32, align 4 + %TargetLocalId = alloca %"range" + %TargetId = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp6 = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %x.addr.ascast = addrspacecast ptr %x.addr to ptr addrspace(4) + %TargetLocalId.ascast = addrspacecast ptr %TargetLocalId to ptr addrspace(4) + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i32 %delta, ptr %delta.addr, align 4 + call spir_func void @Foo51(ptr addrspace(4) dead_on_unwind writable sret(%"range") %TargetLocalId.ascast, ptr addrspace(4) %g.ascast) + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %TargetLocalId.ascast, i32 0) + %0 = load i64, ptr addrspace(4) %call + %1 = load i32, ptr %delta.addr, align 4 + %conv = zext i32 %1 to i64 + %add = add i64 %0, %conv + %call1 = call spir_func i32 @Bar61(ptr addrspace(4) %g.ascast) + %conv2 = zext i32 %call1 to i64 + %cmp = icmp ult i64 %add, %conv2 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32, ptr %delta.addr, align 4 + %conv3 = zext i32 %2 to i64 + %call4 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %TargetLocalId.ascast, i32 0) + %3 = load i64, ptr addrspace(4) %call4 + %add5 = add i64 %3, %conv3 + store i64 %add5, ptr addrspace(4) %call4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %call7 = call spir_func i32 @Foo48(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"range") %agg.tmp6) + store i32 %call7, ptr %TargetId, align 4 + %call8 = call spir_func i32 @Foo49(ptr addrspace(4) align 4 %x.addr.ascast) + %4 = load i32, ptr %TargetId, align 4 + %call9 = call spir_func i32 @Foo50(i32 3, i32 %call8, i32 %4) + ret i32 %call9 +} + + +define internal spir_func i32 @Bar61(ptr addrspace(4) %this) { +entry: + %retval = alloca i32, align 4 + %this.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + call spir_func void @Foo97(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) %this1) + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %ref.tmp.ascast, i32 0) + %0 = load i64, ptr addrspace(4) %call + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + + +define internal spir_func void @Foo97(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %Mask1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %call = call spir_func i32 @Bar62(ptr addrspace(4) %Mask1) + %conv = zext i32 %call to i64 + call spir_func void @Foo9(ptr addrspace(4) %agg.result, i64 %conv) + ret void +} + + +define internal spir_func i32 @Bar62(ptr addrspace(4) %this) { +entry: + %retval = alloca i32, align 4 + %this.addr = alloca ptr addrspace(4) + %count = alloca i32, align 4 + %word = alloca i64 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + store i32 0, ptr %count, align 4 + %Bits1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr addrspace(4) %Bits1 + %bits_num = getelementptr inbounds nuw %"ss_sub_group_mask", ptr addrspace(4) %this1, i32 0, i32 1 + %1 = load i64, ptr addrspace(4) %bits_num + %call = call spir_func i64 @Bar58(ptr addrspace(4) %this1, i64 %1) + %and = and i64 %0, %call + store i64 %and, ptr %word + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %2 = load i64, ptr %word + %tobool = icmp ne i64 %2, 0 + br i1 %tobool, label %while.body, label %while.end + +while.end: ; preds = %while.cond + %3 = load i32, ptr %count, align 4 + ret i32 %3 + +while.body: ; preds = %while.cond + %4 = load i64, ptr %word + %sub = sub i64 %4, 1 + %5 = load i64, ptr %word + %and2 = and i64 %5, %sub + store i64 %and2, ptr %word + %6 = load i32, ptr %count, align 4 + %inc = add i32 %6, 1 + store i32 %inc, ptr %count, align 4 + br label %while.cond +} + + +define internal spir_func void @Foo9(ptr addrspace(4) %this, i64 %dim0) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + %dim0.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %dim0, ptr %dim0.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i64, ptr %dim0.addr + call spir_func void @Foo60(ptr addrspace(4) %this1, i64 %0) + ret void +} + + +define internal spir_func i32 @Foo61(ptr byval(%"tangle_group") %g, ptr byval(%"nd_item") align 1 %0, i32 %x, ptr byval(%"nd_item") align 1 %1){ +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"nd_item", align 1 + %agg.tmp2 = alloca %"nd_item", align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %2 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %3 = addrspacecast ptr %1 to ptr addrspace(4) + %4 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar63(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"nd_item") align 1 %agg.tmp1, i32 %4, ptr byval(%"nd_item") align 1 %agg.tmp2) + ret i32 %call +} + + +define internal spir_func i32 @Bar63(ptr byval(%"tangle_group") %g, ptr byval(%"nd_item") align 1 %0, i32 %x, ptr byval(%"nd_item") align 1 %1){ +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %2 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %3 = addrspacecast ptr %1 to ptr addrspace(4) + %4 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar64(ptr byval(%"tangle_group") %agg.tmp, i32 %4) + ret i32 %call +} + + +define internal spir_func i32 @Bar64(ptr byval(%"tangle_group") %0, i32 %x) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %Arg = alloca i32, align 4 + %Ret = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %2 = load i32, ptr %x.addr, align 4 + store i32 %2, ptr %Arg, align 4 + %3 = load i32, ptr %Arg, align 4 + %call = call spir_func i32 @_Z27__spirv_GroupNonUniformIAddIiET_N5__spv5Scope4FlagEjS0_(i32 3, i32 1, i32 %3) + store i32 %call, ptr %Ret, align 4 + %4 = load i32, ptr %Ret, align 4 + ret i32 %4 +} + +declare dso_local spir_func i32 @_Z27__spirv_GroupNonUniformIAddIiET_N5__spv5Scope4FlagEjS0_(i32, i32, i32) + + +define internal spir_func i32 @Bar12(ptr byval(%"tangle_group") %g, ptr byval(%"nd_item") align 1 %0, i32 %x, ptr byval(%"nd_item") align 1 %1){ +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"nd_item", align 1 + %agg.tmp2 = alloca %"nd_item", align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %2 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %3 = addrspacecast ptr %1 to ptr addrspace(4) + %4 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar65(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"nd_item") align 1 %agg.tmp1, i32 %4, ptr byval(%"nd_item") align 1 %agg.tmp2) + ret i32 %call +} + + +define internal spir_func i32 @Bar65(ptr byval(%"tangle_group") %g, ptr byval(%"nd_item") align 1 %0, i32 %x, ptr byval(%"nd_item") align 1 %1){ +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %2 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %3 = addrspacecast ptr %1 to ptr addrspace(4) + %4 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar66(ptr byval(%"tangle_group") %agg.tmp, i32 %4) + ret i32 %call +} + + +define internal spir_func i32 @Bar66(ptr byval(%"tangle_group") %0, i32 %x) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %Arg = alloca i32, align 4 + %Ret = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %2 = load i32, ptr %x.addr, align 4 + store i32 %2, ptr %Arg, align 4 + %3 = load i32, ptr %Arg, align 4 + %call = call spir_func i32 @_Z27__spirv_GroupNonUniformIAddIiET_N5__spv5Scope4FlagEjS0_(i32 3, i32 2, i32 %3) + store i32 %call, ptr %Ret, align 4 + %4 = load i32, ptr %Ret, align 4 + ret i32 %4 +} + + +define internal spir_func i32 @Bar11(ptr byval(%"tangle_group") %g, ptr byval(%"nd_item") align 1 %0, i32 %x, ptr byval(%"nd_item") align 1 %1){ +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"nd_item", align 1 + %agg.tmp2 = alloca %"nd_item", align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %2 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %3 = addrspacecast ptr %1 to ptr addrspace(4) + %4 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar67(ptr byval(%"tangle_group") %agg.tmp, ptr byval(%"nd_item") align 1 %agg.tmp1, i32 %4, ptr byval(%"nd_item") align 1 %agg.tmp2) + ret i32 %call +} + + +define internal spir_func i32 @Bar67(ptr byval(%"tangle_group") %g, ptr byval(%"nd_item") align 1 %0, i32 %x, ptr byval(%"nd_item") align 1 %1){ +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %2 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %3 = addrspacecast ptr %1 to ptr addrspace(4) + %4 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar68(ptr byval(%"tangle_group") %agg.tmp, i32 %4) + ret i32 %call +} + + +define internal spir_func i32 @Bar68(ptr byval(%"tangle_group") %0, i32 %x) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %Arg = alloca i32, align 4 + %Ret = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %2 = load i32, ptr %x.addr, align 4 + store i32 %2, ptr %Arg, align 4 + %3 = load i32, ptr %Arg, align 4 + %call = call spir_func i32 @_Z27__spirv_GroupNonUniformIAddIiET_N5__spv5Scope4FlagEjS0_(i32 3, i32 0, i32 %3) + store i32 %call, ptr %Ret, align 4 + %4 = load i32, ptr %Ret, align 4 + ret i32 %4 +} + + +define internal spir_func zeroext i1 @Foo66(ptr byval(%"tangle_group") %0, i1 zeroext %pred) { +entry: + %retval = alloca i1, align 1 + %pred.addr = alloca i8, align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + %storedv = zext i1 %pred to i8 + store i8 %storedv, ptr %pred.addr, align 1 + %2 = load i8, ptr %pred.addr, align 1 + %loadedv = trunc i8 %2 to i1 + %call = call spir_func zeroext i1 @Foo99(i32 3, i1 zeroext %loadedv) + ret i1 %call +} + +declare dso_local spir_func zeroext i1 @Foo99(i32, i1 zeroext) + + +define internal spir_func zeroext i1 @Bar10(ptr byval(%"tangle_group") %0, i1 zeroext %pred) { +entry: + %retval = alloca i1, align 1 + %pred.addr = alloca i8, align 1 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + %storedv = zext i1 %pred to i8 + store i8 %storedv, ptr %pred.addr, align 1 + %2 = load i8, ptr %pred.addr, align 1 + %loadedv = trunc i8 %2 to i1 + %call = call spir_func zeroext i1 @_Z26__spirv_GroupNonUniformAnyN5__spv5Scope4FlagEb(i32 3, i1 zeroext %loadedv) + ret i1 %call +} + +declare dso_local spir_func zeroext i1 @_Z26__spirv_GroupNonUniformAnyN5__spv5Scope4FlagEb(i32, i1 zeroext) + + +define internal spir_func void @Foo98(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr byval(%"range") %0, i64 %linear_id) { +entry: + %linear_id.addr = alloca i64 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store i64 %linear_id, ptr %linear_id.addr + %2 = load i64, ptr %linear_id.addr + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %2) + ret void +} + + +define internal spir_func i32 @Bar69(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"range") %local_id) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %agg.tmp1 = alloca %"range" + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call spir_func i32 @Bar70(ptr byval(%"tangle_group") %agg.tmp, i32 %0, ptr byval(%"range") %agg.tmp1) + ret i32 %call +} + + +define internal spir_func i32 @Bar70(ptr byval(%"tangle_group") %g, i32 %x, ptr byval(%"range") %local_id) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %VecId = alloca %"range" + %OCLX = alloca i32, align 4 + %WideOCLX = alloca i32, align 4 + %OCLId = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %VecId.ascast = addrspacecast ptr %VecId to ptr addrspace(4) + %OCLX.ascast = addrspacecast ptr %OCLX to ptr addrspace(4) + %WideOCLX.ascast = addrspacecast ptr %WideOCLX to ptr addrspace(4) + %OCLId.ascast = addrspacecast ptr %OCLId to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + %local_id.ascast = addrspacecast ptr %local_id to ptr addrspace(4) + %0 = load i32, ptr %x.addr, align 4 + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %local_id.ascast, i32 0) + %1 = load i64, ptr addrspace(4) %call + %call1 = call spir_func i32 @Bar71(ptr byval(%"tangle_group") %agg.tmp, i32 %0, i64 %1) + ret i32 %call1 +} + + +define internal spir_func i32 @Bar71(ptr byval(%"tangle_group") %g, i32 %x, i64 %local_id) { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %local_id.addr = alloca i64 + %LocalId = alloca i32, align 4 + %agg.tmp = alloca %"tangle_group" + %GroupLocalId = alloca i32, align 4 + %OCLX = alloca i32, align 4 + %WideOCLX = alloca i32, align 4 + %OCLId = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %x.addr.ascast = addrspacecast ptr %x.addr to ptr addrspace(4) + %GroupLocalId.ascast = addrspacecast ptr %GroupLocalId to ptr addrspace(4) + store i32 %x, ptr %x.addr, align 4 + store i64 %local_id, ptr %local_id.addr + %0 = load i64, ptr %local_id.addr + %conv = trunc i64 %0 to i32 + %call = call spir_func i32 @Bar17(ptr byval(%"tangle_group") %agg.tmp, i32 %conv) + store i32 %call, ptr %LocalId, align 4 + %1 = load i32, ptr %LocalId, align 4 + store i32 %1, ptr %GroupLocalId, align 4 + %call1 = call spir_func i32 @Foo49(ptr addrspace(4) align 4 %x.addr.ascast) + store i32 %call1, ptr %OCLX, align 4 + %2 = load i32, ptr %OCLX, align 4 + store i32 %2, ptr %WideOCLX, align 4 + %call2 = call spir_func i32 @Foo49(ptr addrspace(4) align 4 %GroupLocalId.ascast) + store i32 %call2, ptr %OCLId, align 4 + %3 = load i32, ptr %WideOCLX, align 4 + %4 = load i32, ptr %OCLId, align 4 + %call3 = call spir_func i32 @_Z32__spirv_GroupNonUniformBroadcastIjjET_N5__spv5Scope4FlagES0_T0_(i32 3, i32 %3, i32 %4) + ret i32 %call3 +} + +declare dso_local spir_func i32 @_Z32__spirv_GroupNonUniformBroadcastIjjET_N5__spv5Scope4FlagES0_T0_(i32, i32, i32) + + +define internal spir_func void @Foo96(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) align 1 %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %call = call spir_func i32 @_Z33__spirv_SubgroupLocalInvocationIdv() + %conv = zext i32 %call to i64 + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %conv) + ret void +} + + +define internal spir_func i32 @_Z33__spirv_SubgroupLocalInvocationIdv() { +entry: + %retval = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 + ret i32 %0 +} + + +define internal spir_func i64 @Foo77(ptr addrspace(4) %this, ptr byval(%"range") %Id) { +entry: + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %Result = alloca i64 + %ref.tmp = alloca %class.anon.15 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %Result.ascast = addrspacecast ptr %Result to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %Id.ascast = addrspacecast ptr %Id to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + store i64 0, ptr %Result + %0 = bitcast ptr %ref.tmp to ptr + store ptr addrspace(4) %this1, ptr %0 + %Result2 = getelementptr inbounds %class.anon.15, ptr %ref.tmp, i32 0, i32 1 + store ptr addrspace(4) %Result.ascast, ptr %Result2 + %Id3 = getelementptr inbounds %class.anon.15, ptr %ref.tmp, i32 0, i32 2 + store ptr addrspace(4) %Id.ascast, ptr %Id3 + call spir_func void @Foo79(ptr addrspace(4) %ref.tmp.ascast) + %1 = load i64, ptr %Result + ret i64 %1 +} + + +define internal spir_func ptr addrspace(1) @Foo78(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(1) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = getelementptr inbounds nuw %"accessor", ptr addrspace(4) %this1, i32 0, i32 1 + %1 = load ptr addrspace(1), ptr addrspace(4) %0 + ret ptr addrspace(1) %1 +} + + +define internal spir_func void @Foo79(ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"nd_item", align 1 + store ptr addrspace(4) %f, ptr %f.addr + %0 = load ptr addrspace(4), ptr %f.addr + call spir_func void @Foo80(ptr byval(%"nd_item") align 1 %agg.tmp, ptr addrspace(4) %0) + ret void +} + + +define internal spir_func void @Foo80(ptr byval(%"nd_item") align 1 %0, ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"nd_item", align 1 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store ptr addrspace(4) %f, ptr %f.addr + %2 = load ptr addrspace(4), ptr %f.addr + %call = call spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %ref.tmp.ascast) + call spir_func void @Foo81(ptr addrspace(4) %2, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %this) { +entry: + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + ret i64 0 +} + + +define internal spir_func void @Foo81(ptr addrspace(4) %this, i64 %I) align 2 { +entry: + %this.addr = alloca ptr addrspace(4) + %I.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load ptr addrspace(4), ptr addrspace(4) %0 + %Result = getelementptr inbounds nuw %class.anon.15, ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load ptr addrspace(4), ptr addrspace(4) %Result + %3 = load i64, ptr addrspace(4) %2 + %call = call spir_func ptr addrspace(4) @Bar72(ptr addrspace(4) %1) + %4 = load i64, ptr %I.addr + %conv = trunc i64 %4 to i32 + %call2 = call spir_func i64 @Foo37(ptr addrspace(4) %call, i32 %conv) + %mul = mul i64 %3, %call2 + %Id = getelementptr inbounds nuw %class.anon.15, ptr addrspace(4) %this1, i32 0, i32 2 + %5 = load ptr addrspace(4), ptr addrspace(4) %Id + %6 = load i64, ptr %I.addr + %conv3 = trunc i64 %6 to i32 + %call4 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %5, i32 %conv3) + %7 = load i64, ptr addrspace(4) %call4 + %add = add i64 %mul, %7 + %Result5 = getelementptr inbounds nuw %class.anon.15, ptr addrspace(4) %this1, i32 0, i32 1 + %8 = load ptr addrspace(4), ptr addrspace(4) %Result5 + store i64 %add, ptr addrspace(4) %8 + ret void +} + + +define internal spir_func ptr addrspace(4) @Bar72(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 2 + ret ptr addrspace(4) %MemRange +} + + +define internal spir_func i64 @Foo37(ptr addrspace(4) %this, i32 %dimension) { +entry: + %this.addr.i = alloca ptr addrspace(4) + %dimension.addr.i = alloca i32, align 4 + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %dimension.addr = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store i32 %dimension, ptr %dimension.addr, align 4 + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = load i32, ptr %dimension.addr, align 4 + store ptr addrspace(4) %this1, ptr %this.addr.i + store i32 %0, ptr %dimension.addr.i, align 4 + %this1.i = load ptr addrspace(4), ptr %this.addr.i + %common_array1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load i32, ptr %dimension.addr, align 4 + %idxprom = sext i32 %1 to i64 + %arrayidx = getelementptr inbounds [1 x i64], ptr addrspace(4) %common_array1, i64 0, i64 %idxprom + %2 = load i64, ptr addrspace(4) %arrayidx + ret i64 %2 +} + + +define internal spir_func void @Foo95(ptr byval(%"tangle_group") %g, i32 %FenceScope, i32 %Order) { +entry: + %FenceScope.addr = alloca i32, align 4 + %Order.addr = alloca i32, align 4 + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + store i32 %FenceScope, ptr %FenceScope.addr, align 4 + store i32 %Order, ptr %Order.addr, align 4 + %0 = load i32, ptr %FenceScope.addr, align 4 + %call = call spir_func i32 @Bar73(i32 %0) + %1 = load i32, ptr %Order.addr, align 4 + %call1 = call spir_func i32 @Bar74(i32 %1) + %or = or i32 %call1, 128 + %or2 = or i32 %or, 256 + %or3 = or i32 %or2, 512 + call spir_func void @_Z21__spirv_MemoryBarrierjj(i32 %call, i32 %or3) + ret void +} + + +define internal spir_func i32 @Bar73(i32 %Scope){ +entry: + %retval = alloca i32, align 4 + %Scope.addr = alloca i32, align 4 + store i32 %Scope, ptr %Scope.addr, align 4 + %0 = load i32, ptr %Scope.addr, align 4 + switch i32 %0, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + i32 4, label %sw.bb4 + ] + +sw.bb4: ; preds = %entry + store i32 0, ptr %retval, align 4 + br label %return + +sw.bb3: ; preds = %entry + store i32 1, ptr %retval, align 4 + br label %return + +sw.bb2: ; preds = %entry + store i32 2, ptr %retval, align 4 + br label %return + +sw.bb1: ; preds = %entry + store i32 3, ptr %retval, align 4 + br label %return + +sw.bb: ; preds = %entry + store i32 4, ptr %retval, align 4 + br label %return + +return: ; preds = %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb + %1 = load i32, ptr %retval, align 4 + ret i32 %1 + +sw.epilog: ; preds = %entry + unreachable +} + + +define internal spir_func i32 @Bar74(i32 %Order){ +entry: + %retval = alloca i32, align 4 + %Order.addr = alloca i32, align 4 + %SpvOrder = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store i32 %Order, ptr %Order.addr, align 4 + store i32 0, ptr %SpvOrder, align 4 + %0 = load i32, ptr %Order.addr, align 4 + switch i32 %0, label %sw.epilog [ + i32 0, label %sw.bb + i32 2, label %sw.bb1 + i32 1, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb3 + i32 5, label %sw.bb4 + ] + +sw.bb4: ; preds = %entry + store i32 16, ptr %SpvOrder, align 4 + br label %sw.epilog + +sw.bb3: ; preds = %entry + store i32 8, ptr %SpvOrder, align 4 + br label %sw.epilog + +sw.bb2: ; preds = %entry + store i32 4, ptr %SpvOrder, align 4 + br label %sw.epilog + +sw.bb1: ; preds = %entry, %entry + store i32 2, ptr %SpvOrder, align 4 + br label %sw.epilog + +sw.bb: ; preds = %entry + store i32 0, ptr %SpvOrder, align 4 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb, %entry + %1 = load i32, ptr %SpvOrder, align 4 + %or = or i32 %1, 128 + %or5 = or i32 %or, 256 + %or6 = or i32 %or5, 512 + ret i32 %or6 +} + +declare dso_local spir_func void @_Z21__spirv_MemoryBarrierjj(i32, i32) + + +define internal spir_func i64 @Foo93(ptr addrspace(4) %this, ptr byval(%"range") %Id) { +entry: + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %Result = alloca i64 + %ref.tmp = alloca %class.anon.15 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %Result.ascast = addrspacecast ptr %Result to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %Id.ascast = addrspacecast ptr %Id to ptr addrspace(4) + %this1 = load ptr addrspace(4), ptr %this.addr + store i64 0, ptr %Result + %0 = bitcast ptr %ref.tmp to ptr + store ptr addrspace(4) %this1, ptr %0 + %Result2 = getelementptr inbounds %class.anon.15, ptr %ref.tmp, i32 0, i32 1 + store ptr addrspace(4) %Result.ascast, ptr %Result2 + %Id3 = getelementptr inbounds %class.anon.15, ptr %ref.tmp, i32 0, i32 2 + store ptr addrspace(4) %Id.ascast, ptr %Id3 + call spir_func void @Bar75(ptr addrspace(4) %ref.tmp.ascast) + %1 = load i64, ptr %Result + ret i64 %1 +} + + +define internal spir_func ptr addrspace(1) @Foo94(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(1) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = getelementptr inbounds nuw %"accessor", ptr addrspace(4) %this1, i32 0, i32 1 + %1 = load ptr addrspace(1), ptr addrspace(4) %0 + ret ptr addrspace(1) %1 +} + + +define internal spir_func void @Bar75(ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"nd_item", align 1 + store ptr addrspace(4) %f, ptr %f.addr + %0 = load ptr addrspace(4), ptr %f.addr + call spir_func void @Bar76(ptr byval(%"nd_item") align 1 %agg.tmp, ptr addrspace(4) %0) + ret void +} + + +define internal spir_func void @Bar76(ptr byval(%"nd_item") align 1 %0, ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"nd_item", align 1 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store ptr addrspace(4) %f, ptr %f.addr + %2 = load ptr addrspace(4), ptr %f.addr + %call = call spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %ref.tmp.ascast) + call spir_func void @Bar767(ptr addrspace(4) %2, i64 %call) + ret void +} + + +define internal spir_func void @Bar767(ptr addrspace(4) %this, i64 %I) align 2 { +entry: + %this.addr = alloca ptr addrspace(4) + %I.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load ptr addrspace(4), ptr addrspace(4) %0 + %Result = getelementptr inbounds nuw %class.anon.15, ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load ptr addrspace(4), ptr addrspace(4) %Result + %3 = load i64, ptr addrspace(4) %2 + %call = call spir_func ptr addrspace(4) @Bar78(ptr addrspace(4) %1) + %4 = load i64, ptr %I.addr + %conv = trunc i64 %4 to i32 + %call2 = call spir_func i64 @Foo37(ptr addrspace(4) %call, i32 %conv) + %mul = mul i64 %3, %call2 + %Id = getelementptr inbounds nuw %class.anon.15, ptr addrspace(4) %this1, i32 0, i32 2 + %5 = load ptr addrspace(4), ptr addrspace(4) %Id + %6 = load i64, ptr %I.addr + %conv3 = trunc i64 %6 to i32 + %call4 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %5, i32 %conv3) + %7 = load i64, ptr addrspace(4) %call4 + %add = add i64 %mul, %7 + %Result5 = getelementptr inbounds nuw %class.anon.15, ptr addrspace(4) %this1, i32 0, i32 1 + %8 = load ptr addrspace(4), ptr addrspace(4) %Result5 + store i64 %add, ptr addrspace(4) %8 + ret void +} + + +define internal spir_func ptr addrspace(4) @Bar78(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 2 + ret ptr addrspace(4) %MemRange +} + + +define internal spir_func void @Foo44(ptr addrspace(4) dead_on_unwind noalias writable sret(%"ss_sub_group_mask") %agg.result, ptr byval(%"nd_item") align 1 %g, i1 zeroext %predicate) { +entry: + %predicate.addr = alloca i8, align 1 + %res = alloca <4 x i32>, align 16 + %val = alloca i64 + %ref.tmp = alloca %"range" + %cleanup.dest.slot = alloca i32, align 4 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %g.ascast = addrspacecast ptr %g to ptr addrspace(4) + %storedv = zext i1 %predicate to i8 + store i8 %storedv, ptr %predicate.addr, align 1 + %0 = load i8, ptr %predicate.addr, align 1 + %loadedv = trunc i8 %0 to i1 + %call = call spir_func <4 x i32> @_Z29__spirv_GroupNonUniformBallotjb(i32 3, i1 zeroext %loadedv) + store <4 x i32> %call, ptr %res, align 16 + %1 = load <4 x i32>, ptr %res, align 16 + %vecext = extractelement <4 x i32> %1, i32 0 + %conv = zext i32 %vecext to i64 + store i64 %conv, ptr %val + %2 = load <4 x i32>, ptr %res, align 16 + %vecext1 = extractelement <4 x i32> %2, i32 1 + %conv2 = zext i32 %vecext1 to i64 + %shl = shl i64 %conv2, 32 + %3 = load i64, ptr %val + %or = or i64 %3, %shl + store i64 %or, ptr %val + %4 = load i64, ptr %val + call spir_func void @Bar79(ptr addrspace(4) dead_on_unwind writable sret(%"range") %ref.tmp.ascast, ptr addrspace(4) align 1 %g.ascast) + %call3 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %ref.tmp.ascast, i32 0) + %5 = load i64, ptr addrspace(4) %call3 + call spir_func void @Bar80(ptr addrspace(4) dead_on_unwind writable sret(%"ss_sub_group_mask") %agg.result, i64 %4, i64 %5) + ret void +} + + +define internal spir_func void @Foo45(ptr addrspace(4) %this, ptr byval(%"ss_sub_group_mask") %m) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %Mask1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + ret void +} + +declare dso_local spir_func <4 x i32> @_Z29__spirv_GroupNonUniformBallotjb(i32, i1 zeroext) + + +define internal spir_func void @Bar79(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result, ptr addrspace(4) align 1 %this) { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %call = call spir_func i32 @_Z23__spirv_SubgroupMaxSizev() + %conv = zext i32 %call to i64 + call spir_func void @Foo9(ptr addrspace(4) %agg.result, i64 %conv) + ret void +} + + +define internal spir_func void @Bar80(ptr addrspace(4) dead_on_unwind noalias writable sret(%"ss_sub_group_mask") %agg.result, i64 %Bits, i64 %BitsNum) { +entry: + %Bits.addr = alloca i64 + %BitsNum.addr = alloca i64 + store i64 %Bits, ptr %Bits.addr + store i64 %BitsNum, ptr %BitsNum.addr + %0 = load i64, ptr %Bits.addr + %1 = load i64, ptr %BitsNum.addr + call spir_func void @Bar81(ptr addrspace(4) %agg.result, i64 %0, i64 %1) + ret void +} + + +define internal spir_func void @Bar81(ptr addrspace(4) %this, i64 %rhs, i64 %bn) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + %rhs.addr = alloca i64 + %bn.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %rhs, ptr %rhs.addr + store i64 %bn, ptr %bn.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %Bits1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load i64, ptr %rhs.addr + %1 = load i64, ptr %bn.addr + %call = call spir_func i64 @Bar58(ptr addrspace(4) %this1, i64 %1) + %and = and i64 %0, %call + store i64 %and, ptr addrspace(4) %Bits1 + %bits_num = getelementptr inbounds nuw %"ss_sub_group_mask", ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load i64, ptr %bn.addr + store i64 %2, ptr addrspace(4) %bits_num + %bits_num2 = getelementptr inbounds nuw %"ss_sub_group_mask", ptr addrspace(4) %this1, i32 0, i32 1 + %3 = load i64, ptr addrspace(4) %bits_num2 + %cmp = icmp ule i64 %3, 64 + %4 = addrspacecast ptr addrspace(1) @.str to ptr addrspace(4) + %5 = addrspacecast ptr addrspace(1) @.str.1 to ptr addrspace(4) + %6 = addrspacecast ptr addrspace(1) @__PRETTY_FUNCTION1 to ptr addrspace(4) + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + call spir_func void @__assert_fail(ptr addrspace(4) %4, ptr addrspace(4) %5, i32 324, ptr addrspace(4) %6) + br label %cond.end + +cond.end: ; preds = %entry, %cond.false + ret void +} + + +define internal spir_func i32 @_Z23__spirv_SubgroupMaxSizev() { +entry: + %retval = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupMaxSize, align 4 + ret i32 %0 +} + + +define internal spir_func void @Init6(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @Inv1(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Inv1(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @Inv2() + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @Inv2() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @Foo7(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @Foo8(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Init1(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @Inv3(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Init2(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @InitSize1(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Init3(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @InitSize2(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Init4(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @InitSize3(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Init5(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @InitSize4(ptr addrspace(4) dead_on_unwind writable sret(%"range") %agg.result) + ret void +} + + +define internal spir_func void @Foo23(ptr addrspace(4) dead_on_unwind noalias writable sret(%"group") %agg.result, ptr addrspace(4) %Global, ptr addrspace(4) %Local, ptr addrspace(4) %Group, ptr addrspace(4) %Index) { +entry: + %Global.addr = alloca ptr addrspace(4) + %Local.addr = alloca ptr addrspace(4) + %Group.addr = alloca ptr addrspace(4) + %Index.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"range" + store ptr addrspace(4) %Global, ptr %Global.addr + store ptr addrspace(4) %Local, ptr %Local.addr + store ptr addrspace(4) %Group, ptr %Group.addr + store ptr addrspace(4) %Index, ptr %Index.addr + %0 = load ptr addrspace(4), ptr %Global.addr + %1 = load ptr addrspace(4), ptr %Local.addr + %2 = load ptr addrspace(4), ptr %Group.addr + %3 = load ptr addrspace(4), ptr %Index.addr + call spir_func void @Bar82(ptr addrspace(4) %agg.result, ptr addrspace(4) %0, ptr addrspace(4) %1, ptr byval(%"range") %agg.tmp, ptr addrspace(4) %3) + ret void +} + + +define internal spir_func void @Foo24(ptr addrspace(4) dead_on_unwind noalias writable sret(%"item") %agg.result, ptr addrspace(4) %Extent, ptr addrspace(4) %Index, ptr addrspace(4) %Offset) { +entry: + %Extent.addr = alloca ptr addrspace(4) + %Index.addr = alloca ptr addrspace(4) + %Offset.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %Extent, ptr %Extent.addr + store ptr addrspace(4) %Index, ptr %Index.addr + store ptr addrspace(4) %Offset, ptr %Offset.addr + %0 = load ptr addrspace(4), ptr %Extent.addr + %1 = load ptr addrspace(4), ptr %Index.addr + %2 = load ptr addrspace(4), ptr %Offset.addr + call spir_func void @Foo29(ptr addrspace(4) %agg.result, ptr addrspace(4) %0, ptr addrspace(4) %1, ptr addrspace(4) %2) + ret void +} + + +define internal spir_func void @Foo25(ptr addrspace(4) dead_on_unwind noalias writable sret(%"item.22") %agg.result, ptr addrspace(4) %Extent, ptr addrspace(4) %Index) { +entry: + %Extent.addr = alloca ptr addrspace(4) + %Index.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %Extent, ptr %Extent.addr + store ptr addrspace(4) %Index, ptr %Index.addr + %0 = load ptr addrspace(4), ptr %Extent.addr + %1 = load ptr addrspace(4), ptr %Index.addr + call spir_func void @Foo27(ptr addrspace(4) %agg.result, ptr addrspace(4) %0, ptr addrspace(4) %1) + ret void +} + + +define internal spir_func void @Foo26(ptr addrspace(4) dead_on_unwind noalias writable sret(%"nd_item") align 1 %agg.result, ptr addrspace(4) %Global, ptr addrspace(4) %Local, ptr addrspace(4) %Group) { +entry: + %Global.addr = alloca ptr addrspace(4) + %Local.addr = alloca ptr addrspace(4) + %Group.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %Global, ptr %Global.addr + store ptr addrspace(4) %Local, ptr %Local.addr + store ptr addrspace(4) %Group, ptr %Group.addr + %0 = load ptr addrspace(4), ptr %Global.addr + %1 = load ptr addrspace(4), ptr %Local.addr + %2 = load ptr addrspace(4), ptr %Group.addr + call spir_func void @Foo28(ptr addrspace(4) align 1 %agg.result, ptr addrspace(4) %0, ptr addrspace(4) %1, ptr addrspace(4) %2) + ret void +} + + +define internal spir_func void @Foo28(ptr addrspace(4) align 1 %this, ptr addrspace(4) %0, ptr addrspace(4) %1, ptr addrspace(4) %2) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + %.addr = alloca ptr addrspace(4) + %.addr1 = alloca ptr addrspace(4) + %.addr2 = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %0, ptr %.addr + store ptr addrspace(4) %1, ptr %.addr1 + store ptr addrspace(4) %2, ptr %.addr2 + %this3 = load ptr addrspace(4), ptr %this.addr + ret void +} + + +define internal spir_func void @Foo27(ptr addrspace(4) %this, ptr addrspace(4) %extent, ptr addrspace(4) %index) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + %extent.addr = alloca ptr addrspace(4) + %index.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %extent, ptr %extent.addr + store ptr addrspace(4) %index, ptr %index.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %MImpl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %MExtent2 = bitcast ptr addrspace(4) %MImpl1 to ptr addrspace(4) + %0 = load ptr addrspace(4), ptr %extent.addr + %MIndex = getelementptr inbounds nuw %"sd_ItemBase.23", ptr addrspace(4) %MImpl1, i32 0, i32 1 + %1 = load ptr addrspace(4), ptr %index.addr + ret void +} + + + + +define internal spir_func void @Foo29(ptr addrspace(4) %this, ptr addrspace(4) %extent, ptr addrspace(4) %index, ptr addrspace(4) %offset) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + %extent.addr = alloca ptr addrspace(4) + %index.addr = alloca ptr addrspace(4) + %offset.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %extent, ptr %extent.addr + store ptr addrspace(4) %index, ptr %index.addr + store ptr addrspace(4) %offset, ptr %offset.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %MImpl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %MExtent2 = bitcast ptr addrspace(4) %MImpl1 to ptr addrspace(4) + %0 = load ptr addrspace(4), ptr %extent.addr + %MIndex = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %MImpl1, i32 0, i32 1 + %1 = load ptr addrspace(4), ptr %index.addr + %MOffset = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %MImpl1, i32 0, i32 2 + %2 = load ptr addrspace(4), ptr %offset.addr + ret void +} + + +define internal spir_func void @Bar82(ptr addrspace(4) %this, ptr addrspace(4) %G, ptr addrspace(4) %L, ptr byval(%"range") %GroupRange, ptr addrspace(4) %I) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + %G.addr = alloca ptr addrspace(4) + %L.addr = alloca ptr addrspace(4) + %I.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + store ptr addrspace(4) %G, ptr %G.addr + store ptr addrspace(4) %L, ptr %L.addr + store ptr addrspace(4) %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %globalRange1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %0 = load ptr addrspace(4), ptr %G.addr + %localRange = getelementptr inbounds nuw %"group", ptr addrspace(4) %this1, i32 0, i32 1 + %1 = load ptr addrspace(4), ptr %L.addr + %groupRange = getelementptr inbounds nuw %"group", ptr addrspace(4) %this1, i32 0, i32 2 + %index = getelementptr inbounds nuw %"group", ptr addrspace(4) %this1, i32 0, i32 3 + %2 = load ptr addrspace(4), ptr %I.addr + ret void +} + + +define internal spir_func void @InitSize4(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @_ZN7__spirv15getGlobalOffsetILi0EEEmv() + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZN7__spirv15getGlobalOffsetILi0EEEmv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z22__spirv_GlobalOffset_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z22__spirv_GlobalOffset_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @InitSize3(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @_ZN7__spirv20getLocalInvocationIdILi0EEEmv() + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZN7__spirv20getLocalInvocationIdILi0EEEmv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z27__spirv_LocalInvocationId_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z27__spirv_LocalInvocationId_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @InitSize2(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @_ZN7__spirv14getWorkgroupIdILi0EEEmv() + call spir_func void @Foo46(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZN7__spirv14getWorkgroupIdILi0EEEmv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z21__spirv_WorkgroupId_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z21__spirv_WorkgroupId_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @InitSize1(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @_ZN7__spirv16getNumWorkgroupsILi0EEEmv() + call spir_func void @Foo9(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZN7__spirv16getNumWorkgroupsILi0EEEmv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z23__spirv_NumWorkgroups_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z23__spirv_NumWorkgroups_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @Inv3(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @_ZN7__spirv16getWorkgroupSizeILi0EEEmv() + call spir_func void @Foo9(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZN7__spirv16getWorkgroupSizeILi0EEEmv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z23__spirv_WorkgroupSize_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z23__spirv_WorkgroupSize_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @Foo8(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + %call = call spir_func i64 @_ZN7__spirv13getGlobalSizeILi0EEEmv() + call spir_func void @Foo9(ptr addrspace(4) %agg.result, i64 %call) + ret void +} + + +define internal spir_func i64 @_ZN7__spirv13getGlobalSizeILi0EEEmv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %call = call spir_func i64 @_Z20__spirv_GlobalSize_xv() + ret i64 %call +} + + +define internal spir_func i64 @_Z20__spirv_GlobalSize_xv() { +entry: + %retval = alloca i64 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %0 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32 + %1 = extractelement <3 x i64> %0, i64 0 + ret i64 %1 +} + + +define internal spir_func void @Foo30(ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"nd_item", align 1 + store ptr addrspace(4) %f, ptr %f.addr + %0 = load ptr addrspace(4), ptr %f.addr + call spir_func void @Foo33(ptr byval(%"nd_item") align 1 %agg.tmp, ptr addrspace(4) %0) + ret void +} + + +define internal spir_func i64 @Foo32(ptr addrspace(4) %this) { +entry: + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %TotalOffset = alloca i64 + %ref.tmp = alloca %class.anon.7 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %TotalOffset.ascast = addrspacecast ptr %TotalOffset to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + store i64 0, ptr %TotalOffset + %0 = bitcast ptr %ref.tmp to ptr + store ptr addrspace(4) %this1, ptr %0 + %TotalOffset2 = getelementptr inbounds %class.anon.7, ptr %ref.tmp, i32 0, i32 1 + store ptr addrspace(4) %TotalOffset.ascast, ptr %TotalOffset2 + call spir_func void @Foo34(ptr addrspace(4) %ref.tmp.ascast) + %1 = load i64, ptr %TotalOffset + ret i64 %1 +} + + +define internal spir_func void @Foo34(ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"nd_item", align 1 + store ptr addrspace(4) %f, ptr %f.addr + %0 = load ptr addrspace(4), ptr %f.addr + call spir_func void @Foo35(ptr byval(%"nd_item") align 1 %agg.tmp, ptr addrspace(4) %0) + ret void +} + + +define internal spir_func void @Foo35(ptr byval(%"nd_item") align 1 %0, ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"nd_item", align 1 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store ptr addrspace(4) %f, ptr %f.addr + %2 = load ptr addrspace(4), ptr %f.addr + %call = call spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %ref.tmp.ascast) + call spir_func void @Foo36(ptr addrspace(4) %2, i64 %call) + ret void +} + + +define internal spir_func void @Foo36(ptr addrspace(4) %this, i64 %I) align 2 { +entry: + %this.addr = alloca ptr addrspace(4) + %I.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load ptr addrspace(4), ptr addrspace(4) %0 + %TotalOffset = getelementptr inbounds nuw %class.anon.7, ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load ptr addrspace(4), ptr addrspace(4) %TotalOffset + %3 = load i64, ptr addrspace(4) %2 + %impl1 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 2 + %4 = load i64, ptr %I.addr + %conv = trunc i64 %4 to i32 + %call = call spir_func i64 @Foo37(ptr addrspace(4) %MemRange, i32 %conv) + %mul = mul i64 %3, %call + %TotalOffset2 = getelementptr inbounds nuw %class.anon.7, ptr addrspace(4) %this1, i32 0, i32 1 + %5 = load ptr addrspace(4), ptr addrspace(4) %TotalOffset2 + store i64 %mul, ptr addrspace(4) %5 + %impl32 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %Offset3 = bitcast ptr addrspace(4) %impl32 to ptr addrspace(4) + %6 = load i64, ptr %I.addr + %conv4 = trunc i64 %6 to i32 + %call5 = call spir_func i64 @Foo37(ptr addrspace(4) %Offset3, i32 %conv4) + %TotalOffset6 = getelementptr inbounds nuw %class.anon.7, ptr addrspace(4) %this1, i32 0, i32 1 + %7 = load ptr addrspace(4), ptr addrspace(4) %TotalOffset6 + %8 = load i64, ptr addrspace(4) %7 + %add = add i64 %8, %call5 + store i64 %add, ptr addrspace(4) %7 + ret void +} + + +define internal spir_func void @Foo33(ptr byval(%"nd_item") align 1 %0, ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"nd_item", align 1 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store ptr addrspace(4) %f, ptr %f.addr + %2 = load ptr addrspace(4), ptr %f.addr + %call = call spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %ref.tmp.ascast) + call spir_func void @Foo38(ptr addrspace(4) %2, i64 %call) + ret void +} + + +define internal spir_func void @Foo38(ptr addrspace(4) %this, i64 %I) align 2 { +entry: + %this.addr = alloca ptr addrspace(4) + %I.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load ptr addrspace(4), ptr addrspace(4) %0 + %Offset = getelementptr inbounds nuw %class.anon.6, ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load ptr addrspace(4), ptr addrspace(4) %Offset + %3 = load i64, ptr %I.addr + %conv = trunc i64 %3 to i32 + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %2, i32 %conv) + %4 = load i64, ptr addrspace(4) %call + %call2 = call spir_func ptr addrspace(4) @Foo39(ptr addrspace(4) %1) + %5 = load i64, ptr %I.addr + %conv3 = trunc i64 %5 to i32 + %call4 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %call2, i32 %conv3) + store i64 %4, ptr addrspace(4) %call4 + %AccessRange = getelementptr inbounds nuw %class.anon.6, ptr addrspace(4) %this1, i32 0, i32 2 + %6 = load ptr addrspace(4), ptr addrspace(4) %AccessRange + %7 = load i64, ptr %I.addr + %conv5 = trunc i64 %7 to i32 + %call6 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %6, i32 %conv5) + %8 = load i64, ptr addrspace(4) %call6 + %call7 = call spir_func ptr addrspace(4) @Foo40A(ptr addrspace(4) %1) + %9 = load i64, ptr %I.addr + %conv8 = trunc i64 %9 to i32 + %call9 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %call7, i32 %conv8) + store i64 %8, ptr addrspace(4) %call9 + %MemRange = getelementptr inbounds nuw %class.anon.6, ptr addrspace(4) %this1, i32 0, i32 3 + %10 = load ptr addrspace(4), ptr addrspace(4) %MemRange + %11 = load i64, ptr %I.addr + %conv10 = trunc i64 %11 to i32 + %call11 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %10, i32 %conv10) + %12 = load i64, ptr addrspace(4) %call11 + %call12 = call spir_func ptr addrspace(4) @Foo41A(ptr addrspace(4) %1) + %13 = load i64, ptr %I.addr + %conv13 = trunc i64 %13 to i32 + %call14 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %call12, i32 %conv13) + store i64 %12, ptr addrspace(4) %call14 + ret void +} + + +define internal spir_func ptr addrspace(4) @Foo39(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %Offset2 = bitcast ptr addrspace(4) %impl1 to ptr addrspace(4) + ret ptr addrspace(4) %Offset2 +} + + +define internal spir_func ptr addrspace(4) @Foo40A(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %AccessRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 1 + ret ptr addrspace(4) %AccessRange +} + + +define internal spir_func ptr addrspace(4) @Foo41A(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 2 + ret ptr addrspace(4) %MemRange +} + + +define internal spir_func void @Foo13(ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"nd_item", align 1 + store ptr addrspace(4) %f, ptr %f.addr + %0 = load ptr addrspace(4), ptr %f.addr + call spir_func void @Foo14(ptr byval(%"nd_item") align 1 %agg.tmp, ptr addrspace(4) %0) + ret void +} + + +define internal spir_func i64 @Foo21(ptr addrspace(4) %this) { +entry: + %retval = alloca i64 + %this.addr = alloca ptr addrspace(4) + %TotalOffset = alloca i64 + %ref.tmp = alloca %class.anon.7 + %cleanup.dest.slot = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %TotalOffset.ascast = addrspacecast ptr %TotalOffset to ptr addrspace(4) + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + store i64 0, ptr %TotalOffset + %0 = bitcast ptr %ref.tmp to ptr + store ptr addrspace(4) %this1, ptr %0 + %TotalOffset2 = getelementptr inbounds %class.anon.7, ptr %ref.tmp, i32 0, i32 1 + store ptr addrspace(4) %TotalOffset.ascast, ptr %TotalOffset2 + call spir_func void @Bar83(ptr addrspace(4) %ref.tmp.ascast) + %1 = load i64, ptr %TotalOffset + ret i64 %1 +} + + +define internal spir_func void @Bar83(ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %agg.tmp = alloca %"nd_item", align 1 + store ptr addrspace(4) %f, ptr %f.addr + %0 = load ptr addrspace(4), ptr %f.addr + call spir_func void @Bar84(ptr byval(%"nd_item") align 1 %agg.tmp, ptr addrspace(4) %0) + ret void +} + + +define internal spir_func void @Bar84(ptr byval(%"nd_item") align 1 %0, ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"nd_item", align 1 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store ptr addrspace(4) %f, ptr %f.addr + %2 = load ptr addrspace(4), ptr %f.addr + %call = call spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %ref.tmp.ascast) + call spir_func void @Bar85(ptr addrspace(4) %2, i64 %call) + ret void +} + + +define internal spir_func void @Bar85(ptr addrspace(4) %this, i64 %I) align 2 { +entry: + %this.addr = alloca ptr addrspace(4) + %I.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load ptr addrspace(4), ptr addrspace(4) %0 + %TotalOffset = getelementptr inbounds nuw %class.anon.7, ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load ptr addrspace(4), ptr addrspace(4) %TotalOffset + %3 = load i64, ptr addrspace(4) %2 + %impl1 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 2 + %4 = load i64, ptr %I.addr + %conv = trunc i64 %4 to i32 + %call = call spir_func i64 @Foo37(ptr addrspace(4) %MemRange, i32 %conv) + %mul = mul i64 %3, %call + %TotalOffset2 = getelementptr inbounds nuw %class.anon.7, ptr addrspace(4) %this1, i32 0, i32 1 + %5 = load ptr addrspace(4), ptr addrspace(4) %TotalOffset2 + store i64 %mul, ptr addrspace(4) %5 + %impl32 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) + %Offset3 = bitcast ptr addrspace(4) %impl32 to ptr addrspace(4) + %6 = load i64, ptr %I.addr + %conv4 = trunc i64 %6 to i32 + %call5 = call spir_func i64 @Foo37(ptr addrspace(4) %Offset3, i32 %conv4) + %TotalOffset6 = getelementptr inbounds nuw %class.anon.7, ptr addrspace(4) %this1, i32 0, i32 1 + %7 = load ptr addrspace(4), ptr addrspace(4) %TotalOffset6 + %8 = load i64, ptr addrspace(4) %7 + %add = add i64 %8, %call5 + store i64 %add, ptr addrspace(4) %7 + ret void +} + + +define internal spir_func void @Foo14(ptr byval(%"nd_item") align 1 %0, ptr addrspace(4) %f) { +entry: + %f.addr = alloca ptr addrspace(4) + %ref.tmp = alloca %"nd_item", align 1 + %ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4) + %1 = addrspacecast ptr %0 to ptr addrspace(4) + store ptr addrspace(4) %f, ptr %f.addr + %2 = load ptr addrspace(4), ptr %f.addr + %call = call spir_func i64 @_ZNKSt17integral_constantImLm0EEcvmEv(ptr addrspace(4) align 1 %ref.tmp.ascast) + call spir_func void @Foo15(ptr addrspace(4) %2, i64 %call) + ret void +} + + +define internal spir_func void @Foo15(ptr addrspace(4) %this, i64 %I) align 2 { +entry: + %this.addr = alloca ptr addrspace(4) + %I.addr = alloca i64 + store ptr addrspace(4) %this, ptr %this.addr + store i64 %I, ptr %I.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %0 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %1 = load ptr addrspace(4), ptr addrspace(4) %0 + %Offset = getelementptr inbounds nuw %class.anon.6, ptr addrspace(4) %this1, i32 0, i32 1 + %2 = load ptr addrspace(4), ptr addrspace(4) %Offset + %3 = load i64, ptr %I.addr + %conv = trunc i64 %3 to i32 + %call = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %2, i32 %conv) + %4 = load i64, ptr addrspace(4) %call + %call2 = call spir_func ptr addrspace(4) @Foo17(ptr addrspace(4) %1) + %5 = load i64, ptr %I.addr + %conv3 = trunc i64 %5 to i32 + %call4 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %call2, i32 %conv3) + store i64 %4, ptr addrspace(4) %call4 + %AccessRange = getelementptr inbounds nuw %class.anon.6, ptr addrspace(4) %this1, i32 0, i32 2 + %6 = load ptr addrspace(4), ptr addrspace(4) %AccessRange + %7 = load i64, ptr %I.addr + %conv5 = trunc i64 %7 to i32 + %call6 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %6, i32 %conv5) + %8 = load i64, ptr addrspace(4) %call6 + %call7 = call spir_func ptr addrspace(4) @Foo18(ptr addrspace(4) %1) + %9 = load i64, ptr %I.addr + %conv8 = trunc i64 %9 to i32 + %call9 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %call7, i32 %conv8) + store i64 %8, ptr addrspace(4) %call9 + %MemRange = getelementptr inbounds nuw %class.anon.6, ptr addrspace(4) %this1, i32 0, i32 3 + %10 = load ptr addrspace(4), ptr addrspace(4) %MemRange + %11 = load i64, ptr %I.addr + %conv10 = trunc i64 %11 to i32 + %call11 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %10, i32 %conv10) + %12 = load i64, ptr addrspace(4) %call11 + %call12 = call spir_func ptr addrspace(4) @Foo19(ptr addrspace(4) %1) + %13 = load i64, ptr %I.addr + %conv13 = trunc i64 %13 to i32 + %call14 = call spir_func ptr addrspace(4) @Foo16(ptr addrspace(4) %call12, i32 %conv13) + store i64 %12, ptr addrspace(4) %call14 + ret void +} + + +define internal spir_func ptr addrspace(4) @Foo17(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %Offset2 = bitcast ptr addrspace(4) %impl1 to ptr addrspace(4) + ret ptr addrspace(4) %Offset2 +} + + +define internal spir_func ptr addrspace(4) @Foo18(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %AccessRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 1 + ret ptr addrspace(4) %AccessRange +} + + +define internal spir_func ptr addrspace(4) @Foo19(ptr addrspace(4) %this) { +entry: + %retval = alloca ptr addrspace(4) + %this.addr = alloca ptr addrspace(4) + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %impl1 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %impl1, i32 0, i32 2 + ret ptr addrspace(4) %MemRange +} + + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) + + +define internal spir_func void @Foo12(ptr addrspace(4) dead_on_unwind noalias writable sret(%"range") %agg.result) { +entry: + call spir_func void @Foo9(ptr addrspace(4) %agg.result, i64 0) + ret void +} + + +define internal spir_func void @Foo10(ptr addrspace(4) %this, ptr byval(%"range") %Offset, ptr byval(%"range") %AccessRange, ptr byval(%"range") %MemoryRange) unnamed_addr { +entry: + %this.addr = alloca ptr addrspace(4) + store ptr addrspace(4) %this, ptr %this.addr + %this1 = load ptr addrspace(4), ptr %this.addr + %Offset21 = bitcast ptr addrspace(4) %this1 to ptr addrspace(4) + %AccessRange3 = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %this1, i32 0, i32 1 + %MemRange = getelementptr inbounds nuw %"detail::AccessorImplDevice", ptr addrspace(4) %this1, i32 0, i32 2 + ret void +} + + +define internal spir_func void @__assert_fail(ptr addrspace(4) %expr, ptr addrspace(4) %file, i32 %line, ptr addrspace(4) %func) { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + %call1 = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_yv() + %call2 = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_zv() + %call3 = tail call spir_func i64 @_Z27__spirv_LocalInvocationId_xv() + %call4 = tail call spir_func i64 @_Z27__spirv_LocalInvocationId_yv() + %call5 = tail call spir_func i64 @_Z27__spirv_LocalInvocationId_zv() + tail call spir_func void @__devicelib_assert_fail(ptr addrspace(4) %expr, ptr addrspace(4) %file, i32 %line, ptr addrspace(4) %func, i64 %call, i64 %call1, i64 %call2, i64 %call3, i64 %call4, i64 %call5) + ret void +} + + +define internal spir_func i64 @_Z28__spirv_GlobalInvocationId_yv() local_unnamed_addr { +entry: + %0 = getelementptr inbounds i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8 + %1 = load i64, ptr addrspace(1) %0 + ret i64 %1 +} + + +define internal spir_func i64 @_Z28__spirv_GlobalInvocationId_zv() local_unnamed_addr { +entry: + %0 = getelementptr inbounds i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 16 + %1 = load i64, ptr addrspace(1) %0, align 16 + ret i64 %1 +} + + +define internal spir_func i64 @_Z27__spirv_LocalInvocationId_yv() local_unnamed_addr { +entry: + %0 = getelementptr inbounds i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8 + %1 = load i64, ptr addrspace(1) %0 + ret i64 %1 +} + + +define internal spir_func i64 @_Z27__spirv_LocalInvocationId_zv() local_unnamed_addr { +entry: + %0 = getelementptr inbounds i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 16 + %1 = load i64, ptr addrspace(1) %0, align 16 + ret i64 %1 +} + + +define internal spir_func void @__devicelib_assert_fail(ptr addrspace(4) %expr, ptr addrspace(4) %file, i32 %line, ptr addrspace(4) %func, i64 %gid0, i64 %gid1, i64 %gid2, i64 %lid0, i64 %lid1, i64 %lid2) local_unnamed_addr { +entry: + %call.i = tail call spir_func i32 @_Z29__spirv_AtomicCompareExchangePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii(ptr addrspace(1) @SPIR_AssertHappenedMem, i32 1, i32 16, i32 16, i32 1, i32 0) + %cmp = icmp eq i32 %call.i, 0 + %0 = getelementptr inbounds nuw i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 4 + %1 = getelementptr inbounds nuw i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 261 + %2 = getelementptr inbounds nuw i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 518 + br i1 %cmp, label %if.then, label %if.end82 + +if.then: ; preds = %entry + store i32 %line, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 648) + store i64 %gid0, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 656) + store i64 %gid1, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 664) + store i64 %gid2, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 672) + store i64 %lid0, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 680) + store i64 %lid1, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 688) + store i64 %lid2, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @SPIR_AssertHappenedMem, i64 696) + %tobool.not = icmp eq ptr addrspace(4) %expr, null + br i1 %tobool.not, label %if.end, label %for.cond.preheader + +for.cond.preheader: ; preds = %if.then + br label %for.cond + +for.cond: ; preds = %for.cond.preheader, %for.inc + %ExprLength.0 = phi i32 [ %inc, %for.inc ], [ 0, %for.cond.preheader ] + %C.0 = phi ptr addrspace(4) [ %incdec.ptr, %for.inc ], [ %expr, %for.cond.preheader ] + %3 = load i8, ptr addrspace(4) %C.0, align 1 + %cmp2.not = icmp eq i8 %3, 0 + br i1 %cmp2.not, label %if.end, label %for.inc + +for.inc: ; preds = %for.cond + %incdec.ptr = getelementptr inbounds nuw i8, ptr addrspace(4) %C.0, i64 1 + %inc = add nuw nsw i32 %ExprLength.0, 1 + br label %for.cond + +if.end: ; preds = %for.cond, %if.then + %ExprLength.1 = phi i32 [ 0, %if.then ], [ %ExprLength.0, %for.cond ] + %tobool3.not = icmp eq ptr addrspace(4) %file, null + br i1 %tobool3.not, label %if.end16, label %for.cond6.preheader + +for.cond6.preheader: ; preds = %if.end + br label %for.cond6 + +for.cond6: ; preds = %for.cond6.preheader, %for.inc12 + %FileLength.0 = phi i32 [ %inc14, %for.inc12 ], [ 0, %for.cond6.preheader ] + %C5.0 = phi ptr addrspace(4) [ %incdec.ptr13, %for.inc12 ], [ %file, %for.cond6.preheader ] + %4 = load i8, ptr addrspace(4) %C5.0, align 1 + %cmp8.not = icmp eq i8 %4, 0 + br i1 %cmp8.not, label %if.end16, label %for.inc12 + +for.inc12: ; preds = %for.cond6 + %incdec.ptr13 = getelementptr inbounds nuw i8, ptr addrspace(4) %C5.0, i64 1 + %inc14 = add nuw nsw i32 %FileLength.0, 1 + br label %for.cond6 + +if.end16: ; preds = %for.cond6, %if.end + %FileLength.1 = phi i32 [ 0, %if.end ], [ %FileLength.0, %for.cond6 ] + %tobool17.not = icmp eq ptr addrspace(4) %func, null + br i1 %tobool17.not, label %if.end30.thread, label %for.cond20.preheader + +for.cond20.preheader: ; preds = %if.end16 + br label %for.cond20 + +for.cond20: ; preds = %for.cond20.preheader, %for.inc26 + %FuncLength.0 = phi i32 [ %inc28, %for.inc26 ], [ 0, %for.cond20.preheader ] + %C19.0 = phi ptr addrspace(4) [ %incdec.ptr27, %for.inc26 ], [ %func, %for.cond20.preheader ] + %5 = load i8, ptr addrspace(4) %C19.0, align 1 + %cmp22.not = icmp eq i8 %5, 0 + br i1 %cmp22.not, label %if.end30, label %for.inc26 + +for.inc26: ; preds = %for.cond20 + %incdec.ptr27 = getelementptr inbounds nuw i8, ptr addrspace(4) %C19.0, i64 1 + %inc28 = add i32 %FuncLength.0, 1 + br label %for.cond20 + +if.end30: ; preds = %for.cond20 + %spec.select = tail call i32 @llvm.umin.i32(i32 %ExprLength.1, i32 256) + %MaxFileIdx.0 = tail call i32 @llvm.umin.i32(i32 %FileLength.1, i32 256) + %spec.select126 = tail call i32 @llvm.umin.i32(i32 %FuncLength.0, i32 128) + br label %6 + +if.end30.thread: ; preds = %if.end16 + %spec.select116 = tail call i32 @llvm.umin.i32(i32 %ExprLength.1, i32 256) + %MaxFileIdx.0118 = tail call i32 @llvm.umin.i32(i32 %FileLength.1, i32 256) + br label %6 + +6: ; preds = %if.end30, %if.end30.thread + %MaxFileIdx.0124 = phi i32 [ %MaxFileIdx.0118, %if.end30.thread ], [ %MaxFileIdx.0, %if.end30 ] + %spec.select122 = phi i32 [ %spec.select116, %if.end30.thread ], [ %spec.select, %if.end30 ] + %7 = phi i32 [ 0, %if.end30.thread ], [ %spec.select126, %if.end30 ] + br label %for.cond40 + +for.cond40: ; preds = %for.body44, %6 + %lsr.iv9 = phi ptr addrspace(4) [ %scevgep10, %for.body44 ], [ %expr, %6 ] + %lsr.iv7 = phi ptr addrspace(1) [ %scevgep8, %for.body44 ], [ %0, %6 ] + %Idx.0 = phi i32 [ 0, %6 ], [ %inc48, %for.body44 ] + %cmp41 = icmp ult i32 %Idx.0, %spec.select122 + br i1 %cmp41, label %for.body44, label %for.cond.cleanup42 + +for.cond.cleanup42: ; preds = %for.cond40 + %idxprom50 = zext nneg i32 %spec.select122 to i64 + %arrayidx51 = getelementptr inbounds [257 x i8], ptr addrspace(1) %0, i64 0, i64 %idxprom50 + store i8 0, ptr addrspace(1) %arrayidx51, align 1 + br label %for.cond53 + +for.cond53: ; preds = %for.body57, %for.cond.cleanup42 + %lsr.iv5 = phi ptr addrspace(4) [ %scevgep6, %for.body57 ], [ %file, %for.cond.cleanup42 ] + %lsr.iv3 = phi ptr addrspace(1) [ %scevgep4, %for.body57 ], [ %1, %for.cond.cleanup42 ] + %Idx52.0 = phi i32 [ 0, %for.cond.cleanup42 ], [ %inc63, %for.body57 ] + %cmp54 = icmp ult i32 %Idx52.0, %MaxFileIdx.0124 + br i1 %cmp54, label %for.body57, label %for.cond.cleanup55 + +for.cond.cleanup55: ; preds = %for.cond53 + %idxprom65 = zext nneg i32 %MaxFileIdx.0124 to i64 + %arrayidx66 = getelementptr inbounds [257 x i8], ptr addrspace(1) %1, i64 0, i64 %idxprom65 + store i8 0, ptr addrspace(1) %arrayidx66, align 1 + br label %for.cond68 + +for.cond68: ; preds = %for.body72, %for.cond.cleanup55 + %lsr.iv1 = phi ptr addrspace(4) [ %scevgep2, %for.body72 ], [ %func, %for.cond.cleanup55 ] + %lsr.iv = phi ptr addrspace(1) [ %scevgep, %for.body72 ], [ %2, %for.cond.cleanup55 ] + %Idx67.0 = phi i32 [ 0, %for.cond.cleanup55 ], [ %inc78, %for.body72 ] + %cmp69 = icmp ult i32 %Idx67.0, %7 + br i1 %cmp69, label %for.body72, label %for.cond.cleanup70 + +for.cond.cleanup70: ; preds = %for.cond68 + %idxprom80 = zext nneg i32 %7 to i64 + %arrayidx81 = getelementptr inbounds [129 x i8], ptr addrspace(1) %2, i64 0, i64 %idxprom80 + store i8 0, ptr addrspace(1) %arrayidx81, align 1 + tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(ptr addrspace(1) @SPIR_AssertHappenedMem, i32 1, i32 16, i32 2) + br label %if.end82 + +if.end82: ; preds = %for.cond.cleanup70, %entry + ret void + +for.body72: ; preds = %for.cond68 + %8 = load i8, ptr addrspace(4) %lsr.iv1, align 1 + store i8 %8, ptr addrspace(1) %lsr.iv, align 1 + %inc78 = add nuw nsw i32 %Idx67.0, 1 + %scevgep = getelementptr i8, ptr addrspace(1) %lsr.iv, i64 1 + %scevgep2 = getelementptr i8, ptr addrspace(4) %lsr.iv1, i64 1 + br label %for.cond68 + +for.body57: ; preds = %for.cond53 + %9 = load i8, ptr addrspace(4) %lsr.iv5, align 1 + store i8 %9, ptr addrspace(1) %lsr.iv3, align 1 + %inc63 = add nuw nsw i32 %Idx52.0, 1 + %scevgep4 = getelementptr i8, ptr addrspace(1) %lsr.iv3, i64 1 + %scevgep6 = getelementptr i8, ptr addrspace(4) %lsr.iv5, i64 1 + br label %for.cond53 + +for.body44: ; preds = %for.cond40 + %10 = load i8, ptr addrspace(4) %lsr.iv9, align 1 + store i8 %10, ptr addrspace(1) %lsr.iv7, align 1 + %inc48 = add nuw nsw i32 %Idx.0, 1 + %scevgep8 = getelementptr i8, ptr addrspace(1) %lsr.iv7, i64 1 + %scevgep10 = getelementptr i8, ptr addrspace(4) %lsr.iv9, i64 1 + br label %for.cond40 +} + +declare extern_weak dso_local spir_func i32 @_Z29__spirv_AtomicCompareExchangePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii(ptr addrspace(1), i32, i32, i32, i32, i32) local_unnamed_addr +declare extern_weak dso_local spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(ptr addrspace(1), i32, i32, i32) local_unnamed_addr +declare i32 @llvm.umin.i32(i32, i32) From 4e6f812e9134a99320546d3805f628683b4ed334 Mon Sep 17 00:00:00 2001 From: Henrich Lauko Date: Tue, 3 Dec 2024 16:16:16 +0100 Subject: [PATCH 079/191] [mlir][llvm] Align linkage enum order with LLVM (NFC) (#118484) This change doesn't introduce any functional differences but aligns the implementation more closely with LLVM's representation. Previously, the code generated a lookup table to map MLIR enums to LLVM enums due to the lack of one-to-one correspondence. With this refactoring, the generated code now casts directly from one enum to another. --- mlir/include/mlir-c/Dialect/LLVM.h | 18 ++++---- mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td | 42 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index 0e6434073437a..ed9b23c343150 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -175,17 +175,17 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMComdatAttrGet(MlirContext ctx, MlirLLVMComdat comdat); enum MlirLLVMLinkage { - MlirLLVMLinkagePrivate = 0, - MlirLLVMLinkageInternal = 1, - MlirLLVMLinkageAvailableExternally = 2, - MlirLLVMLinkageLinkonce = 3, + MlirLLVMLinkageExternal = 0, + MlirLLVMLinkageAvailableExternally = 1, + MlirLLVMLinkageLinkonce = 2, + MlirLLVMLinkageLinkonceODR = 3, MlirLLVMLinkageWeak = 4, - MlirLLVMLinkageCommon = 5, + MlirLLVMLinkageWeakODR = 5, MlirLLVMLinkageAppending = 6, - MlirLLVMLinkageExternWeak = 7, - MlirLLVMLinkageLinkonceODR = 8, - MlirLLVMLinkageWeakODR = 9, - MlirLLVMLinkageExternal = 10, + MlirLLVMLinkageInternal = 7, + MlirLLVMLinkagePrivate = 8, + MlirLLVMLinkageExternWeak = 9, + MlirLLVMLinkageCommon = 10, }; typedef enum MlirLLVMLinkage MlirLLVMLinkage; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td index 4a43c16903394..c08b75de03647 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td @@ -615,40 +615,40 @@ def ICmpPredicate : LLVM_EnumAttr< //===----------------------------------------------------------------------===// // Linkage attribute is used on functions and globals. The order follows that of -// https://llvm.org/docs/LangRef.html#linkage-types. The names are equivalent to -// visible names in the IR rather than to enum values names in llvm::GlobalValue -// since the latter is easier to change. -def LinkagePrivate - : LLVM_EnumAttrCase<"Private", "private", "PrivateLinkage", 0>; -def LinkageInternal - : LLVM_EnumAttrCase<"Internal", "internal", "InternalLinkage", 1>; +// llvm::GlobalValue::LinkageTypes from llvm/IR/GlobalValue.h. The names are +// equivalent to visible names in the IR rather than to enum values names in +// llvm::GlobalValue since the latter is easier to change. +def LinkageExternal + : LLVM_EnumAttrCase<"External", "external", "ExternalLinkage", 0>; def LinkageAvailableExternally : LLVM_EnumAttrCase<"AvailableExternally", "available_externally", - "AvailableExternallyLinkage", 2>; + "AvailableExternallyLinkage", 1>; def LinkageLinkonce - : LLVM_EnumAttrCase<"Linkonce", "linkonce", "LinkOnceAnyLinkage", 3>; + : LLVM_EnumAttrCase<"Linkonce", "linkonce", "LinkOnceAnyLinkage", 2>; +def LinkageLinkonceODR + : LLVM_EnumAttrCase<"LinkonceODR", "linkonce_odr", "LinkOnceODRLinkage", 3>; def LinkageWeak : LLVM_EnumAttrCase<"Weak", "weak", "WeakAnyLinkage", 4>; -def LinkageCommon - : LLVM_EnumAttrCase<"Common", "common", "CommonLinkage", 5>; +def LinkageWeakODR + : LLVM_EnumAttrCase<"WeakODR", "weak_odr", "WeakODRLinkage", 5>; def LinkageAppending : LLVM_EnumAttrCase<"Appending", "appending", "AppendingLinkage", 6>; +def LinkageInternal + : LLVM_EnumAttrCase<"Internal", "internal", "InternalLinkage", 7>; +def LinkagePrivate + : LLVM_EnumAttrCase<"Private", "private", "PrivateLinkage", 8>; def LinkageExternWeak - : LLVM_EnumAttrCase<"ExternWeak", "extern_weak", "ExternalWeakLinkage", 7>; -def LinkageLinkonceODR - : LLVM_EnumAttrCase<"LinkonceODR", "linkonce_odr", "LinkOnceODRLinkage", 8>; -def LinkageWeakODR - : LLVM_EnumAttrCase<"WeakODR", "weak_odr", "WeakODRLinkage", 9>; -def LinkageExternal - : LLVM_EnumAttrCase<"External", "external", "ExternalLinkage", 10>; + : LLVM_EnumAttrCase<"ExternWeak", "extern_weak", "ExternalWeakLinkage", 9>; +def LinkageCommon + : LLVM_EnumAttrCase<"Common", "common", "CommonLinkage", 10>; def LinkageEnum : LLVM_EnumAttr< "Linkage", "::llvm::GlobalValue::LinkageTypes", "LLVM linkage types", - [LinkagePrivate, LinkageInternal, LinkageAvailableExternally, - LinkageLinkonce, LinkageWeak, LinkageCommon, LinkageAppending, - LinkageExternWeak, LinkageLinkonceODR, LinkageWeakODR, LinkageExternal]> { + [LinkageExternal, LinkageAvailableExternally, LinkageLinkonce, + LinkageLinkonceODR, LinkageWeak, LinkageWeakODR, LinkageAppending, + LinkageInternal, LinkagePrivate, LinkageExternWeak, LinkageCommon]> { let cppNamespace = "::mlir::LLVM::linkage"; } From 874b4fb6adf742344fabd7df898be31360a563b9 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 3 Dec 2024 16:18:06 +0100 Subject: [PATCH 080/191] [SPIR-V] Fix emission of debug and annotation instructions and add SPV_EXT_optnone SPIR-V extension (#118402) This PR fixes: * emission of OpNames (added newly inserted internal intrinsics and basic blocks) * emission of function attributes (SRet is added) * implementation of SPV_INTEL_optnone so that it emits OptNoneINTEL Function Control flag, and add implementation of the SPV_EXT_optnone SPIR-V extension. --- llvm/docs/SPIRVUsage.rst | 10 +-- llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp | 12 ++-- llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 20 ++++-- llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 1 + llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 61 ++++++++++--------- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 36 +++++++++-- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h | 14 +++-- .../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 3 + llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 10 +++ llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 + llvm/test/CodeGen/SPIRV/bitcast.ll | 2 +- .../SPIRV/extensions/SPV_EXT_optnone.ll | 20 ++++++ .../SPIRV/extensions/SPV_INTEL_optnone.ll | 23 +++---- .../pointers/PtrCast-in-OpSpecConstantOp.ll | 2 +- .../CodeGen/SPIRV/pointers/global-ptrtoint.ll | 2 +- 15 files changed, 146 insertions(+), 72 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_optnone.ll diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 0a2f47c54dd97..28e919fdf516a 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -141,16 +141,18 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na * - Extension Name - Description + * - ``SPV_EXT_arithmetic_fence`` + - Adds an instruction that prevents fast-math optimizations between its argument and the expression that contains it. + * - ``SPV_EXT_demote_to_helper_invocation`` + - Adds an instruction that demotes a fragment shader invocation to a helper invocation. + * - ``SPV_EXT_optnone`` + - Adds OptNoneEXT value for Function Control mask that indicates a request to not optimize the function. * - ``SPV_EXT_shader_atomic_float16_add`` - Extends the SPV_EXT_shader_atomic_float_add extension to support atomically adding to 16-bit floating-point numbers in memory. * - ``SPV_EXT_shader_atomic_float_add`` - Adds atomic add instruction on floating-point numbers. * - ``SPV_EXT_shader_atomic_float_min_max`` - Adds atomic min and max instruction on floating-point numbers. - * - ``SPV_EXT_arithmetic_fence`` - - Adds an instruction that prevents fast-math optimizations between its argument and the expression that contains it. - * - ``SPV_EXT_demote_to_helper_invocation`` - - Adds an instruction that demotes a fragment shader invocation to a helper invocation. * - ``SPV_INTEL_arbitrary_precision_integers`` - Allows generating arbitrary width integer types. * - ``SPV_INTEL_bfloat16_conversion`` diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 8210e20ce5b10..4012bd7696c45 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -45,6 +45,7 @@ using namespace llvm; namespace { class SPIRVAsmPrinter : public AsmPrinter { unsigned NLabels = 0; + SmallPtrSet LabeledMBB; public: explicit SPIRVAsmPrinter(TargetMachine &TM, @@ -152,13 +153,9 @@ void SPIRVAsmPrinter::outputOpFunctionEnd() { outputMCInst(FunctionEndInst); } -// Emit OpFunctionEnd at the end of MF and clear BBNumToRegMap. void SPIRVAsmPrinter::emitFunctionBodyEnd() { - // Do not emit anything if it's an internal service function. - if (isHidden()) - return; - outputOpFunctionEnd(); - MAI->BBNumToRegMap.clear(); + if (!isHidden()) + outputOpFunctionEnd(); } void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) { @@ -171,6 +168,7 @@ void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) { LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB))); outputMCInst(LabelInst); ++NLabels; + LabeledMBB.insert(&MBB); } void SPIRVAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { @@ -267,7 +265,7 @@ void SPIRVAsmPrinter::emitInstruction(const MachineInstr *MI) { // Output OpLabel after OpFunction and OpFunctionParameter in the first MBB. const MachineInstr *NextMI = MI->getNextNode(); - if (!MAI->hasMBBRegister(*MI->getParent()) && isFuncOrHeaderInstr(MI, TII) && + if (!LabeledMBB.contains(MI->getParent()) && isFuncOrHeaderInstr(MI, TII) && (!NextMI || !isFuncOrHeaderInstr(NextMI, TII))) { assert(MI->getParent()->getNumber() == MF->front().getNumber() && "OpFunction is not in the front MBB of MF"); diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 3fdaa6aa3257e..e8e853c5c758a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -65,7 +65,8 @@ bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, } // Based on the LLVM function attributes, get a SPIR-V FunctionControl. -static uint32_t getFunctionControl(const Function &F) { +static uint32_t getFunctionControl(const Function &F, + const SPIRVSubtarget *ST) { MemoryEffects MemEffects = F.getMemoryEffects(); uint32_t FuncControl = static_cast(SPIRV::FunctionControl::None); @@ -80,6 +81,11 @@ static uint32_t getFunctionControl(const Function &F) { else if (MemEffects.onlyReadsMemory()) FuncControl |= static_cast(SPIRV::FunctionControl::Const); + if (ST->canUseExtension(SPIRV::Extension::SPV_INTEL_optnone) || + ST->canUseExtension(SPIRV::Extension::SPV_EXT_optnone)) + if (F.hasFnAttribute(Attribute::OptimizeNone)) + FuncControl |= static_cast(SPIRV::FunctionControl::OptNoneEXT); + return FuncControl; } @@ -346,6 +352,12 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::FuncParamAttr, {Attr}); } + if (Arg.hasAttribute(Attribute::StructRet)) { + auto Attr = + static_cast(SPIRV::FunctionParameterAttribute::Sret); + buildOpDecorate(VRegs[i][0], MIRBuilder, + SPIRV::Decoration::FuncParamAttr, {Attr}); + } if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { std::vector ArgTypeQualDecs = @@ -397,7 +409,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, FTy = fixFunctionTypeIfPtrArgs(GR, F, FTy, RetTy, ArgTypeVRegs); SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs( FTy, RetTy, ArgTypeVRegs, MIRBuilder); - uint32_t FuncControl = getFunctionControl(F); + uint32_t FuncControl = getFunctionControl(F, ST); // Add OpFunction instruction MachineInstrBuilder MB = MIRBuilder.buildInstr(SPIRV::OpFunction) @@ -427,10 +439,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, // Handle entry points and function linkage. if (isEntryPoint(F)) { - const auto &STI = MIRBuilder.getMF().getSubtarget(); - auto executionModel = getExecutionModel(STI, F); auto MIB = MIRBuilder.buildInstr(SPIRV::OpEntryPoint) - .addImm(static_cast(executionModel)) + .addImm(static_cast(getExecutionModel(*ST, F))) .addUse(FuncVReg); addStringImm(F.getName(), MIB); } else if (F.getLinkage() != GlobalValue::InternalLinkage && diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 19a9afac7b3f7..e78fc5ce18707 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -42,6 +42,7 @@ static const std::map> {"SPV_INTEL_global_variable_host_access", SPIRV::Extension::Extension::SPV_INTEL_global_variable_host_access}, {"SPV_INTEL_optnone", SPIRV::Extension::Extension::SPV_INTEL_optnone}, + {"SPV_EXT_optnone", SPIRV::Extension::Extension::SPV_EXT_optnone}, {"SPV_INTEL_usm_storage_classes", SPIRV::Extension::Extension::SPV_INTEL_usm_storage_classes}, {"SPV_INTEL_split_barrier", diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 82d354a3e3a22..f45bdfc7aacb7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -199,6 +199,8 @@ class SPIRVEmitIntrinsics DenseMap Ptrcasts); void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true); + void replaceAllUsesWithAndErase(IRBuilder<> &B, Instruction *Src, + Instruction *Dest, bool DeleteOld = true); bool runOnFunction(Function &F); bool postprocessTypes(Module &M); @@ -322,6 +324,17 @@ static inline void reportFatalOnTokenType(const Instruction *I) { false); } +static void emitAssignName(Instruction *I, IRBuilder<> &B) { + if (!I->hasName() || I->getType()->isAggregateType() || + expectIgnoredInIRTranslation(I)) + return; + reportFatalOnTokenType(I); + setInsertPointAfterDef(B, I); + std::vector Args = {I}; + addStringImm(I->getName(), B, Args); + B.CreateIntrinsic(Intrinsic::spv_assign_name, {I->getType()}, Args); +} + void SPIRVEmitIntrinsics::replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld) { Src->replaceAllUsesWith(Dest); @@ -336,6 +349,19 @@ void SPIRVEmitIntrinsics::replaceAllUsesWith(Value *Src, Value *Dest, } } +void SPIRVEmitIntrinsics::replaceAllUsesWithAndErase(IRBuilder<> &B, + Instruction *Src, + Instruction *Dest, + bool DeleteOld) { + replaceAllUsesWith(Src, Dest, DeleteOld); + std::string Name = Src->hasName() ? Src->getName().str() : ""; + Src->eraseFromParent(); + if (!Name.empty()) { + Dest->setName(Name); + emitAssignName(Dest, B); + } +} + static bool IsKernelArgInt8(Function *F, StoreInst *SI) { return SI && F->getCallingConv() == CallingConv::SPIR_KERNEL && isPointerTy(SI->getValueOperand()->getType()) && @@ -1308,8 +1334,7 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { for (auto &Op : I.operands()) Args.push_back(Op); auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args}); - replaceAllUsesWith(&I, NewI); - I.eraseFromParent(); + replaceAllUsesWithAndErase(B, &I, NewI); return NewI; } @@ -1331,10 +1356,7 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) { SmallVector Types = {I.getType(), Source->getType()}; SmallVector Args(I.op_begin(), I.op_end()); auto *NewI = B.CreateIntrinsic(Intrinsic::spv_bitcast, {Types}, {Args}); - std::string InstName = I.hasName() ? I.getName().str() : ""; - replaceAllUsesWith(&I, NewI); - I.eraseFromParent(); - NewI->setName(InstName); + replaceAllUsesWithAndErase(B, &I, NewI); return NewI; } @@ -1589,10 +1611,7 @@ Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) { B.SetInsertPoint(&I); SmallVector Args(I.op_begin(), I.op_end()); auto *NewI = B.CreateIntrinsic(Intrinsic::spv_insertelt, {Types}, {Args}); - std::string InstName = I.hasName() ? I.getName().str() : ""; - replaceAllUsesWith(&I, NewI); - I.eraseFromParent(); - NewI->setName(InstName); + replaceAllUsesWithAndErase(B, &I, NewI); return NewI; } @@ -1604,10 +1623,7 @@ SPIRVEmitIntrinsics::visitExtractElementInst(ExtractElementInst &I) { I.getIndexOperand()->getType()}; SmallVector Args = {I.getVectorOperand(), I.getIndexOperand()}; auto *NewI = B.CreateIntrinsic(Intrinsic::spv_extractelt, {Types}, {Args}); - std::string InstName = I.hasName() ? I.getName().str() : ""; - replaceAllUsesWith(&I, NewI); - I.eraseFromParent(); - NewI->setName(InstName); + replaceAllUsesWithAndErase(B, &I, NewI); return NewI; } @@ -1641,8 +1657,7 @@ Instruction *SPIRVEmitIntrinsics::visitExtractValueInst(ExtractValueInst &I) { Args.push_back(B.getInt32(Op)); auto *NewI = B.CreateIntrinsic(Intrinsic::spv_extractv, {I.getType()}, {Args}); - replaceAllUsesWith(&I, NewI); - I.eraseFromParent(); + replaceAllUsesWithAndErase(B, &I, NewI); return NewI; } @@ -1701,10 +1716,7 @@ Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) { ArraySize ? B.CreateIntrinsic(Intrinsic::spv_alloca_array, {PtrTy, ArraySize->getType()}, {ArraySize}) : B.CreateIntrinsic(Intrinsic::spv_alloca, {PtrTy}, {}); - std::string InstName = I.hasName() ? I.getName().str() : ""; - replaceAllUsesWith(&I, NewI); - I.eraseFromParent(); - NewI->setName(InstName); + replaceAllUsesWithAndErase(B, &I, NewI); return NewI; } @@ -1914,14 +1926,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, I->setOperand(OpNo, NewOp); } } - if (I->hasName() && !I->getType()->isAggregateType() && - !expectIgnoredInIRTranslation(I)) { - reportFatalOnTokenType(I); - setInsertPointAfterDef(B, I); - std::vector Args = {I}; - addStringImm(I->getName(), B, Args); - B.CreateIntrinsic(Intrinsic::spv_assign_name, {I->getType()}, Args); - } + emitAssignName(I, B); } Type *SPIRVEmitIntrinsics::deduceFunParamElementType(Function *F, diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index d001f9451e739..2054081476315 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -421,6 +421,7 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { continue; MachineFunction *MF = MMI->getMachineFunction(*F); assert(MF); + for (MachineBasicBlock &MBB : *MF) for (MachineInstr &MI : MBB) { if (MAI.getSkipEmission(&MI)) @@ -1548,11 +1549,14 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, SPIRV::OperandCategory::ExecutionModeOperand, SPIRV::ExecutionMode::VecTypeHint, ST); - if (F.hasOptNone() && - ST.canUseExtension(SPIRV::Extension::SPV_INTEL_optnone)) { - // Output OpCapability OptNoneINTEL. - MAI.Reqs.addExtension(SPIRV::Extension::SPV_INTEL_optnone); - MAI.Reqs.addCapability(SPIRV::Capability::OptNoneINTEL); + if (F.hasOptNone()) { + if (ST.canUseExtension(SPIRV::Extension::SPV_EXT_optnone)) { + MAI.Reqs.addExtension(SPIRV::Extension::SPV_EXT_optnone); + MAI.Reqs.addCapability(SPIRV::Capability::OptNoneEXT); + } else if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_optnone)) { + MAI.Reqs.addExtension(SPIRV::Extension::SPV_INTEL_optnone); + MAI.Reqs.addCapability(SPIRV::Capability::OptNoneINTEL); + } } } } @@ -1613,6 +1617,27 @@ static void addDecorations(const Module &M, const SPIRVInstrInfo &TII, } } +static void addMBBNames(const Module &M, const SPIRVInstrInfo &TII, + MachineModuleInfo *MMI, const SPIRVSubtarget &ST, + SPIRV::ModuleAnalysisInfo &MAI) { + for (auto F = M.begin(), E = M.end(); F != E; ++F) { + MachineFunction *MF = MMI->getMachineFunction(*F); + if (!MF) + continue; + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (auto &MBB : *MF) { + if (!MBB.hasName() || MBB.empty()) + continue; + // Emit basic block names. + Register Reg = MRI.createGenericVirtualRegister(LLT::scalar(64)); + MRI.setRegClass(Reg, &SPIRV::IDRegClass); + buildOpName(Reg, MBB.getName(), *std::prev(MBB.end()), TII); + Register GlobalReg = MAI.getOrCreateMBBRegister(MBB); + MAI.setRegisterAlias(MF, Reg, GlobalReg); + } + } +} + struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI; void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { @@ -1631,6 +1656,7 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) { setBaseInfo(M); + addMBBNames(M, *TII, MMI, *ST, MAI); addDecorations(M, *TII, MMI, *ST, MAI); collectReqs(M, MAI, MMI, *ST); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 024728c347e8a..ee2aaf156aa89 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -157,7 +157,7 @@ struct ModuleAnalysisInfo { // The array contains lists of MIs for each module section. InstrList MS[NUM_MODULE_SECTIONS]; // The table maps MBB number to SPIR-V unique ID register. - DenseMap BBNumToRegMap; + DenseMap, Register> BBNumToRegMap; Register getFuncReg(const Function *F) { assert(F && "Function is null"); @@ -188,15 +188,17 @@ struct ModuleAnalysisInfo { } unsigned getNextID() { return MaxID++; } bool hasMBBRegister(const MachineBasicBlock &MBB) { - return BBNumToRegMap.contains(MBB.getNumber()); + auto Key = std::make_pair(MBB.getParent(), MBB.getNumber()); + return BBNumToRegMap.contains(Key); } // Convert MBB's number to corresponding ID register. Register getOrCreateMBBRegister(const MachineBasicBlock &MBB) { - auto f = BBNumToRegMap.find(MBB.getNumber()); - if (f != BBNumToRegMap.end()) - return f->second; + auto Key = std::make_pair(MBB.getParent(), MBB.getNumber()); + auto It = BBNumToRegMap.find(Key); + if (It != BBNumToRegMap.end()) + return It->second; Register NewReg = Register::index2VirtReg(getNextID()); - BBNumToRegMap[MBB.getNumber()] = NewReg; + BBNumToRegMap[Key] = NewReg; return NewReg; } }; diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 35a74af6b03a3..a3a88acdd6c6a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -304,6 +304,7 @@ defm SPV_INTEL_global_variable_host_access : ExtensionOperand<109>; defm SPV_INTEL_global_variable_fpga_decorations : ExtensionOperand<110>; defm SPV_KHR_cooperative_matrix : ExtensionOperand<111>; defm SPV_EXT_arithmetic_fence : ExtensionOperand<112>; +defm SPV_EXT_optnone : ExtensionOperand<113>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -463,6 +464,7 @@ defm PhysicalStorageBufferAddressesEXT : CapabilityOperand<5347, 0, 0, [], [Shad defm CooperativeMatrixNV : CapabilityOperand<5357, 0, 0, [], [Shader]>; defm ArbitraryPrecisionIntegersINTEL : CapabilityOperand<5844, 0, 0, [SPV_INTEL_arbitrary_precision_integers], [Int8, Int16]>; defm OptNoneINTEL : CapabilityOperand<6094, 0, 0, [SPV_INTEL_optnone], []>; +defm OptNoneEXT : CapabilityOperand<6094, 0, 0, [SPV_EXT_optnone], []>; defm BitInstructions : CapabilityOperand<6025, 0, 0, [SPV_KHR_bit_instructions], []>; defm ExpectAssumeKHR : CapabilityOperand<5629, 0, 0, [SPV_KHR_expect_assume], []>; defm FunctionPointersINTEL : CapabilityOperand<5603, 0, 0, [SPV_INTEL_function_pointers], []>; @@ -1433,6 +1435,7 @@ defm Inline : FunctionControlOperand<0x1>; defm DontInline : FunctionControlOperand<0x2>; defm Pure : FunctionControlOperand<0x4>; defm Const : FunctionControlOperand<0x8>; +defm OptNoneEXT : FunctionControlOperand<0x10000>; //===----------------------------------------------------------------------===// // Multiclass used to define MemorySemantics enum values and at the same time diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 50338f5df9028..7a1914aac8ceb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -108,6 +108,16 @@ void buildOpName(Register Target, const StringRef &Name, } } +void buildOpName(Register Target, const StringRef &Name, MachineInstr &I, + const SPIRVInstrInfo &TII) { + if (!Name.empty()) { + auto MIB = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpName)) + .addUse(Target); + addStringImm(Name, MIB); + } +} + static void finishBuildOpDecorate(MachineInstrBuilder &MIB, const std::vector &DecArgs, StringRef StrImm) { diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 6fefe63f44dec..cc77e0afa275a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -129,6 +129,8 @@ void addNumImm(const APInt &Imm, MachineInstrBuilder &MIB); // Add an OpName instruction for the given target register. void buildOpName(Register Target, const StringRef &Name, MachineIRBuilder &MIRBuilder); +void buildOpName(Register Target, const StringRef &Name, MachineInstr &I, + const SPIRVInstrInfo &TII); // Add an OpDecorate instruction for the given Reg. void buildOpDecorate(Register Reg, MachineIRBuilder &MIRBuilder, diff --git a/llvm/test/CodeGen/SPIRV/bitcast.ll b/llvm/test/CodeGen/SPIRV/bitcast.ll index 242c5a46583c2..d6c985dbadcc4 100644 --- a/llvm/test/CodeGen/SPIRV/bitcast.ll +++ b/llvm/test/CodeGen/SPIRV/bitcast.ll @@ -6,7 +6,7 @@ ; CHECK-SPIRV-DAG: %[[#TyHalf:]] = OpTypeFloat 16 ; CHECK-SPIRV-DAG: %[[#Arg32:]] = OpFunctionParameter %[[#TyInt32]] ; CHECK-SPIRV-DAG: %[[#Arg16:]] = OpUConvert %[[#TyInt16]] %[[#Arg32]] -; CHECK-SPIRV-DAG: %[[#ValHalf:]] = OpBitcast %[[#TyHalf]] %8 +; CHECK-SPIRV-DAG: %[[#ValHalf:]] = OpBitcast %[[#TyHalf]] %[[#Arg16:]] ; CHECK-SPIRV-DAG: %[[#ValHalf2:]] = OpFMul %[[#TyHalf]] %[[#ValHalf]] %[[#ValHalf]] ; CHECK-SPIRV-DAG: %[[#Res16:]] = OpBitcast %[[#TyInt16]] %[[#ValHalf2]] ; CHECK-SPIRV-DAG: OpReturnValue %[[#Res16]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_optnone.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_optnone.ll new file mode 100644 index 0000000000000..e21d99badea06 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_optnone.ll @@ -0,0 +1,20 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_EXT_optnone %s -o - | FileCheck %s --check-prefixes=CHECK-EXTENSION +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-NO-EXTENSION + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_optnone %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-EXTENSION: OpCapability OptNoneEXT +; CHECK-EXTENSION: OpExtension "SPV_EXT_optnone" +; CHECK-NO-EXTENSION-NOT: OpCapability OptNoneINTEL +; CHECK-NO-EXTENSION-NOT: OpCapability OptNoneEXT +; CHECK-NO-EXTENSION-NOT: OpExtension "SPV_INTEL_optnone" +; CHECK-NO-EXTENSION-NOT: OpExtension "SPV_EXT_optnone" + +define spir_func void @foo() #0 { +; CHECK-EXTENSION: %[[#]] = OpFunction %[[#]] DontInline|OptNoneEXT %[[#]] +entry: + ret void +} + +attributes #0 = { nounwind optnone noinline } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_optnone.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_optnone.ll index 1744ec9680401..9830b8b4cd2d8 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_optnone.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_optnone.ll @@ -1,25 +1,20 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_optnone %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXTENSION -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO-EXTENSION +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_optnone %s -o - | FileCheck %s --check-prefixes=CHECK-EXTENSION +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-NO-EXTENSION -; CHECK-EXTENSION: OpCapability OptNoneINTEL +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_optnone %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-EXTENSION: OpCapability OptNoneEXT ; CHECK-EXTENSION: OpExtension "SPV_INTEL_optnone" ; CHECK-NO-EXTENSION-NOT: OpCapability OptNoneINTEL +; CHECK-NO-EXTENSION-NOT: OpCapability OptNoneEXT ; CHECK-NO-EXTENSION-NOT: OpExtension "SPV_INTEL_optnone" +; CHECK-NO-EXTENSION-NOT: OpExtension "SPV_EXT_optnone" -; Function Attrs: nounwind optnone noinline define spir_func void @_Z3foov() #0 { -; CHECK-LABEL: _Z3foov -; CHECK: %4 = OpFunction %2 DontInline %3 -; CHECK-NEXT: %5 = OpLabel -; CHECK-NEXT: OpReturn -; CHECK-NEXT: OpFunctionEnd +; CHECK-EXTENSION: %[[#]] = OpFunction %[[#]] DontInline|OptNoneEXT %[[#]] entry: ret void } attributes #0 = { nounwind optnone noinline } - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-EXTENSION: {{.*}} -; CHECK-NO-EXTENSION: {{.*}} diff --git a/llvm/test/CodeGen/SPIRV/pointers/PtrCast-in-OpSpecConstantOp.ll b/llvm/test/CodeGen/SPIRV/pointers/PtrCast-in-OpSpecConstantOp.ll index cd1a1b0080c62..55d638f80cc55 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/PtrCast-in-OpSpecConstantOp.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/PtrCast-in-OpSpecConstantOp.ll @@ -15,7 +15,7 @@ ; CHECK-DAG: %[[GenPtrChar:.*]] = OpTypePointer Generic %[[Char]] ; CHECK-DAG: %[[CWPtrChar:.*]] = OpTypePointer CrossWorkgroup %[[Char]] ; CHECK-DAG: %[[Arr1:.*]] = OpTypeArray %[[CWPtrChar]] %[[#]] -; CHECK-DAG: %[[Struct1:.*]] = OpTypeStruct %8 +; CHECK-DAG: %[[Struct1:.*]] = OpTypeStruct %[[Arr1]] ; CHECK-DAG: %[[Arr2:.*]] = OpTypeArray %[[GenPtrChar]] %[[#]] ; CHECK-DAG: %[[Struct2:.*]] = OpTypeStruct %[[Arr2]] ; CHECK-DAG: %[[GenPtr:.*]] = OpTypePointer Generic %[[Int]] diff --git a/llvm/test/CodeGen/SPIRV/pointers/global-ptrtoint.ll b/llvm/test/CodeGen/SPIRV/pointers/global-ptrtoint.ll index 7982893a0a913..16c20f9067e6e 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/global-ptrtoint.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/global-ptrtoint.ll @@ -11,7 +11,7 @@ ; CHECK-DAG: %[[TyStruct:.*]] = OpTypeStruct %[[TyI64]] %[[TyI64]] ; CHECK-DAG: %[[Const128:.*]] = OpConstant %[[TyI64]] 128 ; CHECK-DAG: %[[GlobalValue]] = OpVariable -; CHECK-DAG: %[[PtrToInt:.*]] = OpSpecConstantOp %[[TyI64]] 117 %12 +; CHECK-DAG: %[[PtrToInt:.*]] = OpSpecConstantOp %[[TyI64]] 117 %[[GlobalValue]] ; TODO: The following bitcast line looks unneeded and we may expect it to be removed in future ; CHECK-DAG: %[[UseGlobalValue:.*]] = OpSpecConstantOp %[[TyI64]] 124 %[[PtrToInt]] ; CHECK-DAG: %[[ConstComposite:.*]] = OpConstantComposite %[[TyStruct]] %[[Const128]] %[[UseGlobalValue]] From af35e21cfe3f7cfc7ddd7a2f535e775e9205f61d Mon Sep 17 00:00:00 2001 From: Kelvin Li Date: Tue, 3 Dec 2024 10:27:14 -0500 Subject: [PATCH 081/191] [flang] Update CommandTest for AIX (NFC) (#118403) With the change in commit e335563, the behavior for `ECLGeneralErrorCommandErrorSync` on AIX is the same as on Linux. --- flang/unittests/Runtime/CommandTest.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/flang/unittests/Runtime/CommandTest.cpp b/flang/unittests/Runtime/CommandTest.cpp index 05287d80e14f5..ecb325330f1ad 100644 --- a/flang/unittests/Runtime/CommandTest.cpp +++ b/flang/unittests/Runtime/CommandTest.cpp @@ -352,9 +352,6 @@ TEST_F(ZeroArguments, ECLGeneralErrorCommandErrorSync) { #if defined(_WIN32) CheckDescriptorEqInt(cmdStat.get(), 6); CheckDescriptorEqStr(cmdMsg.get(), "Invalid command lineXXXXXXXXX"); -#elif defined(_AIX) - CheckDescriptorEqInt(cmdStat.get(), 6); - CheckDescriptorEqStr(cmdMsg.get(), "Invalid command lineXXXXXXXXX"); #else CheckDescriptorEqInt(cmdStat.get(), 3); CheckDescriptorEqStr(cmdMsg.get(), "Command line execution failed"); From 9ad09b2930ef2e95bf8772c91f623881d1c14733 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 3 Dec 2024 15:31:25 +0000 Subject: [PATCH 082/191] [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. (#118461) Use references instead of pointers for most state, initialize it all in the constructor, and common up some of the initialization between the legacy and new pass manager paths. --- .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 198 ++++++++---------- 1 file changed, 91 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 7257b53afe69d..75e20c7930168 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -95,32 +95,45 @@ static cl::opt DisableFDivExpand( cl::ReallyHidden, cl::init(false)); +static bool hasUnsafeFPMath(const Function &F) { + return F.getFnAttribute("unsafe-fp-math").getValueAsBool(); +} + class AMDGPUCodeGenPrepareImpl : public InstVisitor { public: - const GCNSubtarget *ST = nullptr; - const AMDGPUTargetMachine *TM = nullptr; - const TargetLibraryInfo *TLInfo = nullptr; - AssumptionCache *AC = nullptr; - DominatorTree *DT = nullptr; - UniformityInfo *UA = nullptr; - Module *Mod = nullptr; - const DataLayout *DL = nullptr; - bool HasUnsafeFPMath = false; - bool HasFP32DenormalFlush = false; + Function &F; + const GCNSubtarget &ST; + const AMDGPUTargetMachine &TM; + const TargetLibraryInfo *TLI; + AssumptionCache *AC; + const DominatorTree *DT; + const UniformityInfo &UA; + const DataLayout &DL; + const bool HasUnsafeFPMath; + const bool HasFP32DenormalFlush; bool FlowChanged = false; mutable Function *SqrtF32 = nullptr; mutable Function *LdexpF32 = nullptr; DenseMap BreakPhiNodesCache; + AMDGPUCodeGenPrepareImpl(Function &F, const AMDGPUTargetMachine &TM, + const TargetLibraryInfo *TLI, AssumptionCache *AC, + const DominatorTree *DT, const UniformityInfo &UA) + : F(F), ST(TM.getSubtarget(F)), TM(TM), TLI(TLI), AC(AC), + DT(DT), UA(UA), DL(F.getDataLayout()), + HasUnsafeFPMath(hasUnsafeFPMath(F)), + HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals == + DenormalMode::getPreserveSign()) {} + Function *getSqrtF32() const { if (SqrtF32) return SqrtF32; - LLVMContext &Ctx = Mod->getContext(); - SqrtF32 = Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::amdgcn_sqrt, - {Type::getFloatTy(Ctx)}); + LLVMContext &Ctx = F.getContext(); + SqrtF32 = Intrinsic::getOrInsertDeclaration( + F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)}); return SqrtF32; } @@ -128,9 +141,10 @@ class AMDGPUCodeGenPrepareImpl if (LdexpF32) return LdexpF32; - LLVMContext &Ctx = Mod->getContext(); + LLVMContext &Ctx = F.getContext(); LdexpF32 = Intrinsic::getOrInsertDeclaration( - Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)}); + F.getParent(), Intrinsic::ldexp, + {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)}); return LdexpF32; } @@ -166,8 +180,7 @@ class AMDGPUCodeGenPrepareImpl /// Wrapper to pass all the arguments to computeKnownFPClass KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested, const Instruction *CtxI) const { - return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI, - DT); + return llvm::computeKnownFPClass(V, DL, Interested, 0, TLI, AC, CtxI, DT); } bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const { @@ -317,13 +330,10 @@ class AMDGPUCodeGenPrepareImpl bool visitBitreverseIntrinsicInst(IntrinsicInst &I); bool visitMinNum(IntrinsicInst &I); bool visitSqrt(IntrinsicInst &I); - bool run(Function &F); + bool run(); }; class AMDGPUCodeGenPrepare : public FunctionPass { -private: - AMDGPUCodeGenPrepareImpl Impl; - public: static char ID; AMDGPUCodeGenPrepare() : FunctionPass(ID) { @@ -339,13 +349,12 @@ class AMDGPUCodeGenPrepare : public FunctionPass { AU.setPreservesAll(); } bool runOnFunction(Function &F) override; - bool doInitialization(Module &M) override; StringRef getPassName() const override { return "AMDGPU IR optimizations"; } }; } // end anonymous namespace -bool AMDGPUCodeGenPrepareImpl::run(Function &F) { +bool AMDGPUCodeGenPrepareImpl::run() { BreakPhiNodesCache.clear(); bool MadeChange = false; @@ -411,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const { if (const VectorType *VT = dyn_cast(T)) { // TODO: The set of packed operations is more limited, so may want to // promote some anyway. - if (ST->hasVOP3PInsts()) + if (ST.hasVOP3PInsts()) return false; return needsPromotionToI32(VT->getElementType()); @@ -422,7 +431,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const { bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const { return Ty->isFloatTy() || Ty->isDoubleTy() || - (Ty->isHalfTy() && ST->has16BitInsts()); + (Ty->isHalfTy() && ST.has16BitInsts()); } // Return true if the op promoted to i32 should have nsw set. @@ -455,11 +464,10 @@ static bool promotedOpIsNUW(const Instruction &I) { bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const { Type *Ty = I.getType(); - const DataLayout &DL = Mod->getDataLayout(); int TySize = DL.getTypeSizeInBits(Ty); Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty); - return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I); + return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I); } bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const { @@ -591,11 +599,11 @@ bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32( } unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const { - return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits(); + return computeKnownBits(Op, DL, 0, AC).countMaxActiveBits(); } unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const { - return ComputeMaxSignificantBits(Op, *DL, 0, AC); + return ComputeMaxSignificantBits(Op, DL, 0, AC); } static void extractValues(IRBuilder<> &Builder, @@ -631,11 +639,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const { Type *Ty = I.getType(); unsigned Size = Ty->getScalarSizeInBits(); - if (Size <= 16 && ST->has16BitInsts()) + if (Size <= 16 && ST.has16BitInsts()) return false; // Prefer scalar if this could be s_mul_i32 - if (UA->isUniform(&I)) + if (UA.isUniform(&I)) return false; Value *LHS = I.getOperand(0); @@ -646,11 +654,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const { unsigned LHSBits = 0, RHSBits = 0; bool IsSigned = false; - if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 && + if (ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 && (RHSBits = numBitsUnsigned(RHS)) <= 24) { IsSigned = false; - } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 && + } else if (ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 && (RHSBits = numBitsSigned(RHS)) <= 24) { IsSigned = true; @@ -730,21 +738,21 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const { if (CastOp) { if (!CastOp->hasOneUse()) return false; - CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL); - CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL); + CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), DL); + CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), DL); } // TODO: Handle special 0/-1 cases DAG combine does, although we only really // need to handle divisions here. - Constant *FoldedT = SelOpNo ? - ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) : - ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL); + Constant *FoldedT = + SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, DL) + : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, DL); if (!FoldedT || isa(FoldedT)) return false; - Constant *FoldedF = SelOpNo ? - ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) : - ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL); + Constant *FoldedF = + SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, DL) + : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, DL); if (!FoldedF || isa(FoldedF)) return false; @@ -777,7 +785,7 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder, // result? It's unspecified by the spec. Value *FrexpExp = - ST->hasFractBug() + ST.hasFractBug() ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp, {Builder.getInt32Ty(), Ty}, Src) : Builder.CreateExtractValue(Frexp, {1}); @@ -815,7 +823,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS, // If we have have to work around the fract/frexp bug, we're worse off than // using the fdiv.fast expansion. The full safe expansion is faster if we have // fast FMA. - if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() && + if (HasFP32DenormalFlush && ST.hasFractBug() && !ST.hasFastFMAF32() && (!FMF.noNaNs() || !FMF.noInfs())) return nullptr; @@ -1157,17 +1165,12 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) { if (NewVal) { FDiv.replaceAllUsesWith(NewVal); NewVal->takeName(&FDiv); - RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo); + RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI); } return true; } -static bool hasUnsafeFPMath(const Function &F) { - Attribute Attr = F.getFnAttribute("unsafe-fp-math"); - return Attr.getValueAsBool(); -} - static std::pair getMul64(IRBuilder<> &Builder, Value *LHS, Value *RHS) { Type *I32Ty = Builder.getInt32Ty(); @@ -1192,7 +1195,6 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num, Value *Den, unsigned AtLeast, bool IsSigned) const { - const DataLayout &DL = Mod->getDataLayout(); unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); if (LHSSignBits < AtLeast) return -1; @@ -1271,7 +1273,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl( Value *FQNeg = Builder.CreateFNeg(FQ); // float fr = mad(fqneg, fb, fa); - auto FMAD = !ST->hasMadMacF32Insts() + auto FMAD = !ST.hasMadMacF32Insts() ? Intrinsic::fma : (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz; Value *FR = Builder.CreateIntrinsic(FMAD, @@ -1338,7 +1340,7 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I, // If there's no wider mulhi, there's only a better expansion for powers of // two. // TODO: Should really know for each vector element. - if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT)) + if (isKnownToBeAPowerOfTwo(C, DL, true, 0, AC, &I, DT)) return true; return false; @@ -1348,8 +1350,8 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I, // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (BinOpDen->getOpcode() == Instruction::Shl && isa(BinOpDen->getOperand(0)) && - isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true, - 0, AC, &I, DT)) { + isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), DL, true, 0, AC, &I, + DT)) { return true; } } @@ -1357,9 +1359,9 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I, return false; } -static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) { +static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) { // Check whether the sign can be determined statically. - KnownBits Known = computeKnownBits(V, *DL); + KnownBits Known = computeKnownBits(V, DL); if (Known.isNegative()) return Constant::getAllOnesValue(V->getType()); if (Known.isNonNegative()) @@ -1542,8 +1544,8 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { if (foldBinOpIntoSelect(I)) return true; - if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && - UA->isUniform(&I) && promoteUniformOpToI32(I)) + if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) && + UA.isUniform(&I) && promoteUniformOpToI32(I)) return true; if (UseMul24Intrin && replaceMulWithMul24(I)) @@ -1655,11 +1657,11 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) { }; WidenLoad->setMetadata(LLVMContext::MD_range, - MDNode::get(Mod->getContext(), LowAndHigh)); + MDNode::get(F.getContext(), LowAndHigh)); } } - int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType()); + int TySize = DL.getTypeSizeInBits(I.getType()); Type *IntNTy = Builder.getIntNTy(TySize); Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy); Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType()); @@ -1674,8 +1676,8 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) { bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; - if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) && - UA->isUniform(&I)) + if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) && + UA.isUniform(&I)) Changed |= promoteUniformOpToI32(I); return Changed; @@ -1688,8 +1690,8 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { Value *CmpVal; FCmpInst::Predicate Pred; - if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) { - if (UA->isUniform(&I)) + if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) { + if (UA.isUniform(&I)) return promoteUniformOpToI32(I); return false; } @@ -1722,7 +1724,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { Fract->takeName(&I); I.replaceAllUsesWith(Fract); - RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo); + RecursivelyDeleteTriviallyDeadInstructions(&I, TLI); return true; } @@ -1947,7 +1949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) { FixedVectorType *FVT = dyn_cast(I.getType()); if (!FVT || FVT->getNumElements() == 1 || - DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold) + DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold) return false; if (!ForceBreakLargePHIs && !canBreakPHINode(I)) @@ -1960,7 +1962,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) { unsigned Idx = 0; // For 8/16 bits type, don't scalarize fully but break it up into as many // 32-bit slices as we can, and scalarize the tail. - const unsigned EltSize = DL->getTypeSizeInBits(EltTy); + const unsigned EltSize = DL.getTypeSizeInBits(EltTy); const unsigned NumElts = FVT->getNumElements(); if (EltSize == 8 || EltSize == 16) { const unsigned SubVecSize = (32 / EltSize); @@ -2079,7 +2081,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { SmallVector WorkList; getUnderlyingObjects(I.getOperand(0), WorkList); if (!all_of(WorkList, [&](const Value *V) { - return isPtrKnownNeverNull(V, *DL, *TM, SrcAS); + return isPtrKnownNeverNull(V, DL, TM, SrcAS); })) return false; @@ -2107,8 +2109,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) { bool Changed = false; - if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && - UA->isUniform(&I)) + if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) && + UA.isUniform(&I)) Changed |= promoteUniformBitreverseToI32(I); return Changed; @@ -2120,7 +2122,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) { /// If fract is a useful instruction for the subtarget. Does not account for the /// nan handling; the instruction has a nan check on the input value. Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) { - if (ST->hasFractBug()) + if (ST.hasFractBug()) return nullptr; if (I.getIntrinsicID() != Intrinsic::minnum) @@ -2177,7 +2179,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) { // Match pattern for fract intrinsic in contexts where the nan check has been // optimized out (and hope the knowledge the source can't be nan wasn't lost). if (!I.hasNoNaNs() && - !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(*DL, TLInfo))) + !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(DL, TLI))) return false; IRBuilder<> Builder(&I); @@ -2189,7 +2191,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) { Fract->takeName(&I); I.replaceAllUsesWith(Fract); - RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo); + RecursivelyDeleteTriviallyDeadInstructions(&I, TLI); return true; } @@ -2201,7 +2203,7 @@ static bool isOneOrNegOne(const Value *Val) { // Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way. bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { Type *Ty = Sqrt.getType()->getScalarType(); - if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts())) + if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST.has16BitInsts())) return false; const FPMathOperator *FPOp = cast(&Sqrt); @@ -2257,14 +2259,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { return true; } -bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { - Impl.Mod = &M; - Impl.DL = &Impl.Mod->getDataLayout(); - Impl.SqrtF32 = nullptr; - Impl.LdexpF32 = nullptr; - return false; -} - bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -2274,36 +2268,26 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { return false; const AMDGPUTargetMachine &TM = TPC->getTM(); - Impl.TM = &TM; - Impl.TLInfo = &getAnalysis().getTLI(F); - Impl.ST = &TM.getSubtarget(F); - Impl.AC = &getAnalysis().getAssumptionCache(F); - Impl.UA = &getAnalysis().getUniformityInfo(); + const TargetLibraryInfo *TLI = + &getAnalysis().getTLI(F); + AssumptionCache *AC = + &getAnalysis().getAssumptionCache(F); auto *DTWP = getAnalysisIfAvailable(); - Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr; - Impl.HasUnsafeFPMath = hasUnsafeFPMath(F); - SIModeRegisterDefaults Mode(F, *Impl.ST); - Impl.HasFP32DenormalFlush = - Mode.FP32Denormals == DenormalMode::getPreserveSign(); - return Impl.run(F); + const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + const UniformityInfo &UA = + getAnalysis().getUniformityInfo(); + return AMDGPUCodeGenPrepareImpl(F, TM, TLI, AC, DT, UA).run(); } PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) { - AMDGPUCodeGenPrepareImpl Impl; - Impl.Mod = F.getParent(); - Impl.DL = &Impl.Mod->getDataLayout(); - Impl.TM = static_cast(&TM); - Impl.TLInfo = &FAM.getResult(F); - Impl.ST = &TM.getSubtarget(F); - Impl.AC = &FAM.getResult(F); - Impl.UA = &FAM.getResult(F); - Impl.DT = FAM.getCachedResult(F); - Impl.HasUnsafeFPMath = hasUnsafeFPMath(F); - SIModeRegisterDefaults Mode(F, *Impl.ST); - Impl.HasFP32DenormalFlush = - Mode.FP32Denormals == DenormalMode::getPreserveSign(); - if (!Impl.run(F)) + const AMDGPUTargetMachine &ATM = static_cast(TM); + const TargetLibraryInfo *TLI = &FAM.getResult(F); + AssumptionCache *AC = &FAM.getResult(F); + const DominatorTree *DT = FAM.getCachedResult(F); + const UniformityInfo &UA = FAM.getResult(F); + AMDGPUCodeGenPrepareImpl Impl(F, ATM, TLI, AC, DT, UA); + if (!Impl.run()) return PreservedAnalyses::all(); PreservedAnalyses PA = PreservedAnalyses::none(); if (!Impl.FlowChanged) From c7babfa6a375d1094cf5e5d7ce4b36b2b9a6b717 Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Tue, 3 Dec 2024 16:37:57 +0100 Subject: [PATCH 083/191] [Offload] Find libc relative to DeviceRTL path (#118497) This was discussed as a potential solution in https://github.com/llvm/llvm-project/pull/118173 --- offload/DeviceRTL/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 3da83e5c30713..b52305b9516fb 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -131,7 +131,7 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden -DOMPTARGET_DEVICE_RUNTIME -I${include_directory} -I${devicertl_base_directory}/../include - -I${LLVM_MAIN_SRC_DIR}/../libc + -I${devicertl_base_directory}/../../libc ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} ) @@ -276,7 +276,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple) target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512) target_include_directories(${ide_target_name} PRIVATE ${include_directory} - ${LLVM_MAIN_SRC_DIR}/../libc + ${devicertl_base_directory}/../../libc ${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS} ) From 866b9f43a0772faee64aa421886eb101118f3167 Mon Sep 17 00:00:00 2001 From: Dominik Steenken Date: Tue, 3 Dec 2024 17:08:51 +0100 Subject: [PATCH 084/191] [SystemZ] Add realistic cost estimates for vector reduction intrinsics (#118319) This PR adds more realistic cost estimates for these reduction intrinsics - `llvm.vector.reduce.umax` - `llvm.vector.reduce.umin` - `llvm.vector.reduce.smax` - `llvm.vector.reduce.smin` - `llvm.vector.reduce.fadd` - `llvm.vector.reduce.fmul` - `llvm.vector.reduce.fmax` - `llvm.vector.reduce.fmin` - `llvm.vector.reduce.fmaximum` - `llvm.vector.reduce.fminimum` - `llvm.vector.reduce.mul ` The pre-existing cost estimates for `llvm.vector.reduce.add` are moved to `getArithmeticReductionCosts` to reduce complexity in `getVectorIntrinsicInstrCost` and enable other passes, like the SLP vectorizer, to benefit from these updated calculations. These are not expected to provide noticable performance improvements and are rather provided for the sake of completeness and correctness. This PR is in draft mode pending benchmark confirmation of this. This also provides and/or updates cost tests for all of these intrinsics. This PR was co-authored by me and @JonPsson1 . --- .../SystemZ/SystemZTargetTransformInfo.cpp | 93 +++- .../SystemZ/SystemZTargetTransformInfo.h | 7 + .../Analysis/CostModel/SystemZ/reduce-add.ll | 128 ------ .../CostModel/SystemZ/vector-reductions.ll | 376 ++++++++++++++++ .../SLPVectorizer/SystemZ/reductions-fadd.ll | 188 ++++++++ .../SystemZ/reductions-fmin-fmax.ll | 411 ++++++++++++++++++ .../SLPVectorizer/SystemZ/reductions-fmul.ll | 188 ++++++++ 7 files changed, 1245 insertions(+), 146 deletions(-) delete mode 100644 llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/vector-reductions.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fadd.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmin-fmax.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmul.ll diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 83b42f6d1794d..772efcdf8f9fc 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/MathExtras.h" using namespace llvm; @@ -1396,30 +1397,86 @@ InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost( return NumVectorMemOps + NumPermutes; } +InstructionCost getIntAddReductionCost(unsigned NumVec, unsigned ScalarBits) { + InstructionCost Cost = 0; + // Binary Tree of N/2 + N/4 + ... operations yields N - 1 operations total. + Cost += NumVec - 1; + // For integer adds, VSUM creates shorter reductions on the final vector. + Cost += (ScalarBits < 32) ? 3 : 2; + return Cost; +} + +InstructionCost getFastReductionCost(unsigned NumVec, unsigned NumElems, + unsigned ScalarBits) { + unsigned NumEltsPerVecReg = (SystemZ::VectorBits / ScalarBits); + InstructionCost Cost = 0; + // Binary Tree of N/2 + N/4 + ... operations yields N - 1 operations total. + Cost += NumVec - 1; + // For each shuffle / arithmetic layer, we need 2 instructions, and we need + // log2(Elements in Last Vector) layers. + Cost += 2 * Log2_32_Ceil(std::min(NumElems, NumEltsPerVecReg)); + return Cost; +} + +inline bool customCostReductions(unsigned Opcode) { + return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || + Opcode == Instruction::Add || Opcode == Instruction::Mul; +} + +InstructionCost +SystemZTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + std::optional FMF, + TTI::TargetCostKind CostKind) { + unsigned ScalarBits = Ty->getScalarSizeInBits(); + // The following is only for subtargets with vector math, non-ordered + // reductions, and reasonable scalar sizes for int and fp add/mul. + if (customCostReductions(Opcode) && ST->hasVector() && + !TTI::requiresOrderedReduction(FMF) && + ScalarBits <= SystemZ::VectorBits) { + unsigned NumVectors = getNumVectorRegs(Ty); + unsigned NumElems = ((FixedVectorType *)Ty)->getNumElements(); + // Integer Add is using custom code gen, that needs to be accounted for. + if (Opcode == Instruction::Add) + return getIntAddReductionCost(NumVectors, ScalarBits); + // The base cost is the same across all other arithmetic instructions + InstructionCost Cost = + getFastReductionCost(NumVectors, NumElems, ScalarBits); + // But we need to account for the final op involving the scalar operand. + if ((Opcode == Instruction::FAdd) || (Opcode == Instruction::FMul)) + Cost += 1; + return Cost; + } + // otherwise, fall back to the standard implementation + return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); +} + +InstructionCost +SystemZTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, + TTI::TargetCostKind CostKind) { + // Return custom costs only on subtargets with vector enhancements. + if (ST->hasVectorEnhancements1()) { + unsigned NumVectors = getNumVectorRegs(Ty); + unsigned NumElems = ((FixedVectorType *)Ty)->getNumElements(); + unsigned ScalarBits = Ty->getScalarSizeInBits(); + InstructionCost Cost = 0; + // Binary Tree of N/2 + N/4 + ... operations yields N - 1 operations total. + Cost += NumVectors - 1; + // For the final vector, we need shuffle + min/max operations, and + // we need #Elements - 1 of them. + Cost += 2 * (std::min(NumElems, SystemZ::VectorBits / ScalarBits) - 1); + return Cost; + } + // For other targets, fall back to the standard implementation + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); +} + static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, const SmallVectorImpl &ParamTys) { if (RetTy->isVectorTy() && ID == Intrinsic::bswap) return getNumVectorRegs(RetTy); // VPERM - if (ID == Intrinsic::vector_reduce_add) { - // Retrieve number and size of elements for the vector op. - auto *VTy = cast(ParamTys.front()); - unsigned ScalarSize = VTy->getScalarSizeInBits(); - // For scalar sizes >128 bits, we fall back to the generic cost estimate. - if (ScalarSize > SystemZ::VectorBits) - return -1; - // This many vector regs are needed to represent the input elements (V). - unsigned VectorRegsNeeded = getNumVectorRegs(VTy); - // This many instructions are needed for the final sum of vector elems (S). - unsigned LastVectorHandling = (ScalarSize < 32) ? 3 : 2; - // We use vector adds to create a sum vector, which takes - // V/2 + V/4 + ... = V - 1 operations. - // Then, we need S operations to sum up the elements of that sum vector, - // for a total of V + S - 1 operations. - int Cost = VectorRegsNeeded + LastVectorHandling - 1; - return Cost; - } return -1; } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 6795da59bf5b1..512fcc854d532 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -130,6 +130,13 @@ class SystemZTTIImpl : public BasicTTIImplBase { Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false); + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + std::optional FMF, + TTI::TargetCostKind CostKind); + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, + TTI::TargetCostKind CostKind); + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); diff --git a/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll b/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll deleted file mode 100644 index 90b5b746c914a..0000000000000 --- a/llvm/test/Analysis/CostModel/SystemZ/reduce-add.ll +++ /dev/null @@ -1,128 +0,0 @@ -; RUN: opt < %s -mtriple=systemz-unknown -mcpu=z13 -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s - -define void @reduce(ptr %src, ptr %dst) { -; CHECK-LABEL: 'reduce' -; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %R2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %V2_64) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %V4_64) -; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %R8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %V8_64) -; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %R16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %V16_64) -; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %R2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %V2_32) -; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %R4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %V4_32) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %V8_32) -; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %R16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %V16_32) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %V2_16) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %V4_16) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %V8_16) -; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %R16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %V16_16) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %V2_8) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %V4_8) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %V8_8) -; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %R16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %V16_8) -; -; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %R128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %V128_8) -; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %R4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> %V4_256) - - ; REDUCEADD64 - - %V2_64 = load <2 x i64>, ptr %src, align 8 - %R2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %V2_64) - store volatile i64 %R2_64, ptr %dst, align 4 - - %V4_64 = load <4 x i64>, ptr %src, align 8 - %R4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %V4_64) - store volatile i64 %R4_64, ptr %dst, align 4 - - %V8_64 = load <8 x i64>, ptr %src, align 8 - %R8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %V8_64) - store volatile i64 %R8_64, ptr %dst, align 4 - - %V16_64 = load <16 x i64>, ptr %src, align 8 - %R16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %V16_64) - store volatile i64 %R16_64, ptr %dst, align 4 - - ; REDUCEADD32 - - %V2_32 = load <2 x i32>, ptr %src, align 8 - %R2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %V2_32) - store volatile i32 %R2_32, ptr %dst, align 4 - - %V4_32 = load <4 x i32>, ptr %src, align 8 - %R4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %V4_32) - store volatile i32 %R4_32, ptr %dst, align 4 - - %V8_32 = load <8 x i32>, ptr %src, align 8 - %R8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %V8_32) - store volatile i32 %R8_32, ptr %dst, align 4 - - %V16_32 = load <16 x i32>, ptr %src, align 8 - %R16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %V16_32) - store volatile i32 %R16_32, ptr %dst, align 4 - - ; REDUCEADD16 - - %V2_16 = load <2 x i16>, ptr %src, align 8 - %R2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %V2_16) - store volatile i16 %R2_16, ptr %dst, align 4 - - %V4_16 = load <4 x i16>, ptr %src, align 8 - %R4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %V4_16) - store volatile i16 %R4_16, ptr %dst, align 4 - - %V8_16 = load <8 x i16>, ptr %src, align 8 - %R8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %V8_16) - store volatile i16 %R8_16, ptr %dst, align 4 - - %V16_16 = load <16 x i16>, ptr %src, align 8 - %R16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %V16_16) - store volatile i16 %R16_16, ptr %dst, align 4 - - ; REDUCEADD8 - - %V2_8 = load <2 x i8>, ptr %src, align 8 - %R2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %V2_8) - store volatile i8 %R2_8, ptr %dst, align 4 - - %V4_8 = load <4 x i8>, ptr %src, align 8 - %R4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %V4_8) - store volatile i8 %R4_8, ptr %dst, align 4 - - %V8_8 = load <8 x i8>, ptr %src, align 8 - %R8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %V8_8) - store volatile i8 %R8_8, ptr %dst, align 4 - - %V16_8 = load <16 x i8>, ptr %src, align 8 - %R16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %V16_8) - store volatile i8 %R16_8, ptr %dst, align 4 - - ; EXTREME VALUES - - %V128_8 = load <128 x i8>, ptr %src, align 8 - %R128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %V128_8) - store volatile i8 %R128_8, ptr %dst, align 4 - - %V4_256 = load <4 x i256>, ptr %src, align 8 - %R4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> %V4_256) - store volatile i256 %R4_256, ptr %dst, align 8 - - ret void -} - -declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) -declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) -declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) -declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) -declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) -declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) -declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) -declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) -declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) -declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) -declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) -declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) -declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) -declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) -declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) -declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) - -declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) -declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>) diff --git a/llvm/test/Analysis/CostModel/SystemZ/vector-reductions.ll b/llvm/test/Analysis/CostModel/SystemZ/vector-reductions.ll new file mode 100644 index 0000000000000..0def20215e988 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/vector-reductions.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes='print' -disable-output -mtriple=s390x-unknown-linux \ +; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15 + +define void @fadd_reductions() { +; Z15-LABEL: 'fadd_reductions' +; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + +define void @fast_fadd_reductions(ptr %src, ptr %dst) { +; Z15-LABEL: 'fast_fadd_reductions' +; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + +define void @fmul_reductions() { +; Z15-LABEL: 'fmul_reductions' +; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) + %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) + %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) + %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) + %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + +define void @fast_fmul_reductions() { +; Z15-LABEL: 'fast_fmul_reductions' +; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) + %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) + %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) + %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) + %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) + + ret void +} + +define void @fmin_reductions() { +; Z15-LABEL: 'fmin_reductions' +; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) + %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) + %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) + %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) + %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) + ret void +} + +define void @fmax_reductions() { +; Z15-LABEL: 'fmax_reductions' +; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) + %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) + %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) + %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) + %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) + ret void +} + +define void @reduceumin() { +; Z15-LABEL: 'reduceumin' +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) +; + %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) + %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) + + %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) + %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) + + ret void +} + +define void @reduceumax() { +; Z15-LABEL: 'reduceumax' +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) +; + %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) + %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) + + %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) + %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) + + ret void +} + +define void @reducesmin() { +; Z15-LABEL: 'reducesmin' +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) +; + %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) + %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) + + %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) + %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) + + ret void +} + +define void @reducesmax() { +; Z15-LABEL: 'reducesmax' +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) +; + %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) + %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) + + %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) + %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) + + ret void +} + +define void @reduceadd() { +; Z15-LABEL: 'reduceadd' +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; +; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) + + ; REDUCEADD64 + %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) + %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) + %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) + %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) + ; REDUCEADD32 + %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) + %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) + %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) + %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) + ; REDUCEADD16 + %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) + %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) + %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) + %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) + ; REDUCEADD8 + %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) + %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) + %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) + %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) + ; EXTREME VALUES + %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) + %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) + + ret void +} + +define void @reducemul() { +; CHECK-LABEL: 'reducemul' +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) +; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) +; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) +; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) +; +; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) +; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) + + ; REDUCEADD64 + %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) + %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) + %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) + %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) + ; REDUCEADD32 + %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) + %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) + %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) + %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) + ; REDUCEADD16 + %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) + %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) + %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) + %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) + ; REDUCEADD8 + %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) + %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) + %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) + %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) + ; EXTREME VALUES + %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) + %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) + + ret void +} + +declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) +declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) +declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) + +declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) +declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) +declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>) + +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>) + +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>) + +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) +declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>) + +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) +declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>) + +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) +declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>) + +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) +declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>) + +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) + +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) +declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>) + +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) +declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) +declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) +declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) + +declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) +declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fadd.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fadd.ll new file mode 100644 index 0000000000000..fa0587f1da931 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fadd.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \ +; RUN: | FileCheck %s +; +; Test vectorization and reassociation of fadd operations. If the loads can +; be vectorized, cases of fewer operands are also profitable to vectorize. + +define double @fadd_double_4_addends_seq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fadd_double_4_addends_seq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[X]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call reassoc nsz arcp contract afn double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP0]]) +; CHECK-NEXT: ret double [[TMP1]] +; +entry: + %0 = load double, ptr %x, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1 + %1 = load double, ptr %arrayidx1, align 8 + %add = fadd reassoc nsz arcp contract afn double %1, %0 + %arrayidx2 = getelementptr inbounds double, ptr %x, i64 2 + %2 = load double, ptr %arrayidx2, align 8 + %add3 = fadd reassoc nsz arcp contract afn double %add, %2 + %arrayidx4 = getelementptr inbounds double, ptr %x, i64 3 + %3 = load double, ptr %arrayidx4, align 8 + %add5 = fadd reassoc nsz arcp contract afn double %add3, %3 + ret double %add5 +} + +define double @fadd_double_8_addends_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fadd_double_8_addends_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[X]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[ARRAYIDX4]], align 8 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX6]], align 8 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 +; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX10]], align 8 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 +; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[TMP5]], i32 5 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[TMP6]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[TMP7]], i32 7 +; CHECK-NEXT: [[TMP16:%.*]] = call reassoc nsz arcp contract afn double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP15]]) +; CHECK-NEXT: ret double [[TMP16]] +; +entry: + %0 = load double, ptr %x, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %x, i64 2 + %1 = load double, ptr %arrayidx1, align 8 + %add = fadd reassoc nsz arcp contract afn double %1, %0 + %arrayidx2 = getelementptr inbounds double, ptr %x, i64 4 + %2 = load double, ptr %arrayidx2, align 8 + %add3 = fadd reassoc nsz arcp contract afn double %add, %2 + %arrayidx4 = getelementptr inbounds double, ptr %x, i64 6 + %3 = load double, ptr %arrayidx4, align 8 + %add5 = fadd reassoc nsz arcp contract afn double %add3, %3 + %arrayidx6 = getelementptr inbounds double, ptr %x, i64 8 + %4 = load double, ptr %arrayidx6, align 8 + %add7 = fadd reassoc nsz arcp contract afn double %add5, %4 + %arrayidx8 = getelementptr inbounds double, ptr %x, i64 10 + %5 = load double, ptr %arrayidx8, align 8 + %add9 = fadd reassoc nsz arcp contract afn double %add7, %5 + %arrayidx10 = getelementptr inbounds double, ptr %x, i64 12 + %6 = load double, ptr %arrayidx10, align 8 + %add11 = fadd reassoc nsz arcp contract afn double %add9, %6 + %arrayidx12 = getelementptr inbounds double, ptr %x, i64 14 + %7 = load double, ptr %arrayidx12, align 8 + %add13 = fadd reassoc nsz arcp contract afn double %add11, %7 + ret double %add13 +} + +define float @fadd_float_16_addends_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define float @fadd_float_16_addends_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 +; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 +; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 +; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX16]], align 4 +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX18]], align 4 +; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX22]], align 4 +; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 26 +; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX24]], align 4 +; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds float, ptr [[X]], i64 28 +; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX26]], align 4 +; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 +; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX28]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x float> [[TMP16]], float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x float> [[TMP17]], float [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x float> [[TMP18]], float [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x float> [[TMP19]], float [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x float> [[TMP20]], float [[TMP5]], i32 5 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x float> [[TMP21]], float [[TMP6]], i32 6 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x float> [[TMP22]], float [[TMP7]], i32 7 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x float> [[TMP23]], float [[TMP8]], i32 8 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x float> [[TMP24]], float [[TMP9]], i32 9 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x float> [[TMP25]], float [[TMP10]], i32 10 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x float> [[TMP26]], float [[TMP11]], i32 11 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x float> [[TMP27]], float [[TMP12]], i32 12 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x float> [[TMP28]], float [[TMP13]], i32 13 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x float> [[TMP29]], float [[TMP14]], i32 14 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x float> [[TMP30]], float [[TMP15]], i32 15 +; CHECK-NEXT: [[TMP32:%.*]] = call reassoc nsz arcp contract afn float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP31]]) +; CHECK-NEXT: ret float [[TMP32]] +; +entry: + %0 = load float, ptr %x, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %x, i64 2 + %1 = load float, ptr %arrayidx1, align 4 + %add = fadd reassoc nsz arcp contract afn float %1, %0 + %arrayidx2 = getelementptr inbounds float, ptr %x, i64 4 + %2 = load float, ptr %arrayidx2, align 4 + %add3 = fadd reassoc nsz arcp contract afn float %add, %2 + %arrayidx4 = getelementptr inbounds float, ptr %x, i64 6 + %3 = load float, ptr %arrayidx4, align 4 + %add5 = fadd reassoc nsz arcp contract afn float %add3, %3 + %arrayidx6 = getelementptr inbounds float, ptr %x, i64 8 + %4 = load float, ptr %arrayidx6, align 4 + %add7 = fadd reassoc nsz arcp contract afn float %add5, %4 + %arrayidx8 = getelementptr inbounds float, ptr %x, i64 10 + %5 = load float, ptr %arrayidx8, align 4 + %add9 = fadd reassoc nsz arcp contract afn float %add7, %5 + %arrayidx10 = getelementptr inbounds float, ptr %x, i64 12 + %6 = load float, ptr %arrayidx10, align 4 + %add11 = fadd reassoc nsz arcp contract afn float %add9, %6 + %arrayidx12 = getelementptr inbounds float, ptr %x, i64 14 + %7 = load float, ptr %arrayidx12, align 4 + %add13 = fadd reassoc nsz arcp contract afn float %add11, %7 + %arrayidx14 = getelementptr inbounds float, ptr %x, i64 16 + %8 = load float, ptr %arrayidx14, align 4 + %add15 = fadd reassoc nsz arcp contract afn float %add13, %8 + %arrayidx16 = getelementptr inbounds float, ptr %x, i64 18 + %9 = load float, ptr %arrayidx16, align 4 + %add17 = fadd reassoc nsz arcp contract afn float %add15, %9 + %arrayidx18 = getelementptr inbounds float, ptr %x, i64 20 + %10 = load float, ptr %arrayidx18, align 4 + %add19 = fadd reassoc nsz arcp contract afn float %add17, %10 + %arrayidx20 = getelementptr inbounds float, ptr %x, i64 22 + %11 = load float, ptr %arrayidx20, align 4 + %add21 = fadd reassoc nsz arcp contract afn float %add19, %11 + %arrayidx22 = getelementptr inbounds float, ptr %x, i64 24 + %12 = load float, ptr %arrayidx22, align 4 + %add23 = fadd reassoc nsz arcp contract afn float %add21, %12 + %arrayidx24 = getelementptr inbounds float, ptr %x, i64 26 + %13 = load float, ptr %arrayidx24, align 4 + %add25 = fadd reassoc nsz arcp contract afn float %add23, %13 + %arrayidx26 = getelementptr inbounds float, ptr %x, i64 28 + %14 = load float, ptr %arrayidx26, align 4 + %add27 = fadd reassoc nsz arcp contract afn float %add25, %14 + %arrayidx28 = getelementptr inbounds float, ptr %x, i64 30 + %15 = load float, ptr %arrayidx28, align 4 + %add29 = fadd reassoc nsz arcp contract afn float %add27, %15 + ret float %add29 +} diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmin-fmax.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmin-fmax.ll new file mode 100644 index 0000000000000..5ea777e1c9a10 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmin-fmax.ll @@ -0,0 +1,411 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \ +; RUN: | FileCheck %s + +; Test vectorization and reassociation of fmin/fmax operations. Vectorization +; is more profitable if the loads are also vectorizable. + +define double @fmin_double_4_nums_seq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fmin_double_4_nums_seq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP1]]) +; CHECK-NEXT: ret double [[TMP2]] +; + %g1 = getelementptr inbounds double, ptr %x, i64 1 + %g2 = getelementptr inbounds double, ptr %x, i64 2 + %g3 = getelementptr inbounds double, ptr %x, i64 3 + %t0 = load double, ptr %x, align 4 + %t1 = load double, ptr %g1, align 4 + %t2 = load double, ptr %g2, align 4 + %t3 = load double, ptr %g3, align 4 + %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) + ret double %m3 +} + +define double @fmin_double_16_nums_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fmin_double_16_nums_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 +; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16 +; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18 +; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20 +; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22 +; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24 +; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26 +; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28 +; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30 +; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4 +; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4 +; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4 +; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4 +; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4 +; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4 +; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4 +; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4 +; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15 +; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> [[TMP16]]) +; CHECK-NEXT: ret double [[TMP17]] +; + %g1 = getelementptr inbounds double, ptr %x, i64 2 + %g2 = getelementptr inbounds double, ptr %x, i64 4 + %g3 = getelementptr inbounds double, ptr %x, i64 6 + %g4 = getelementptr inbounds double, ptr %x, i64 8 + %g5 = getelementptr inbounds double, ptr %x, i64 10 + %g6 = getelementptr inbounds double, ptr %x, i64 12 + %g7 = getelementptr inbounds double, ptr %x, i64 14 + %g8 = getelementptr inbounds double, ptr %x, i64 16 + %g9 = getelementptr inbounds double, ptr %x, i64 18 + %g10 = getelementptr inbounds double, ptr %x, i64 20 + %g11 = getelementptr inbounds double, ptr %x, i64 22 + %g12 = getelementptr inbounds double, ptr %x, i64 24 + %g13 = getelementptr inbounds double, ptr %x, i64 26 + %g14 = getelementptr inbounds double, ptr %x, i64 28 + %g15 = getelementptr inbounds double, ptr %x, i64 30 + %t0 = load double, ptr %x, align 4 + %t1 = load double, ptr %g1, align 4 + %t2 = load double, ptr %g2, align 4 + %t3 = load double, ptr %g3, align 4 + %t4 = load double, ptr %g4, align 4 + %t5 = load double, ptr %g5, align 4 + %t6 = load double, ptr %g6, align 4 + %t7 = load double, ptr %g7, align 4 + %t8 = load double, ptr %g8, align 4 + %t9 = load double, ptr %g9, align 4 + %t10 = load double, ptr %g10, align 4 + %t11 = load double, ptr %g11, align 4 + %t12 = load double, ptr %g12, align 4 + %t13 = load double, ptr %g13, align 4 + %t14 = load double, ptr %g14, align 4 + %t15 = load double, ptr %g15, align 4 + %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) + %m4 = tail call fast double @llvm.minnum.f64(double %t4, double %m3) + %m5 = tail call fast double @llvm.minnum.f64(double %t5, double %m4) + %m6 = tail call fast double @llvm.minnum.f64(double %t6, double %m5) + %m7 = tail call fast double @llvm.minnum.f64(double %t7, double %m6) + %m8 = tail call fast double @llvm.minnum.f64(double %t8, double %m7) + %m9 = tail call fast double @llvm.minnum.f64(double %t9, double %m8) + %m10 = tail call fast double @llvm.minnum.f64(double %t10, double %m9) + %m11 = tail call fast double @llvm.minnum.f64(double %t11, double %m10) + %m12 = tail call fast double @llvm.minnum.f64(double %t12, double %m11) + %m13 = tail call fast double @llvm.minnum.f64(double %t13, double %m12) + %m14 = tail call fast double @llvm.minnum.f64(double %t14, double %m13) + %m15 = tail call fast double @llvm.minnum.f64(double %t15, double %m14) + ret double %m15 +} + +define float @fmin_float_12_nums_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define float @fmin_float_12_nums_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 +; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 +; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 +; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 +; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 +; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4 +; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4 +; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4 +; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4 +; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11 +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.v12f32(<12 x float> [[TMP12]]) +; CHECK-NEXT: ret float [[TMP13]] +; + %g1 = getelementptr inbounds float, ptr %x, i64 2 + %g2 = getelementptr inbounds float, ptr %x, i64 4 + %g3 = getelementptr inbounds float, ptr %x, i64 6 + %g4 = getelementptr inbounds float, ptr %x, i64 8 + %g5 = getelementptr inbounds float, ptr %x, i64 10 + %g6 = getelementptr inbounds float, ptr %x, i64 12 + %g7 = getelementptr inbounds float, ptr %x, i64 14 + %g8 = getelementptr inbounds float, ptr %x, i64 16 + %g9 = getelementptr inbounds float, ptr %x, i64 18 + %g10 = getelementptr inbounds float, ptr %x, i64 20 + %g11 = getelementptr inbounds float, ptr %x, i64 22 + %t0 = load float, ptr %x, align 4 + %t1 = load float, ptr %g1, align 4 + %t2 = load float, ptr %g2, align 4 + %t3 = load float, ptr %g3, align 4 + %t4 = load float, ptr %g4, align 4 + %t5 = load float, ptr %g5, align 4 + %t6 = load float, ptr %g6, align 4 + %t7 = load float, ptr %g7, align 4 + %t8 = load float, ptr %g8, align 4 + %t9 = load float, ptr %g9, align 4 + %t10 = load float, ptr %g10, align 4 + %t11 = load float, ptr %g11, align 4 + %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) + %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3) + %m5 = tail call fast float @llvm.minnum.f32(float %t5, float %m4) + %m6 = tail call fast float @llvm.minnum.f32(float %t6, float %m5) + %m7 = tail call fast float @llvm.minnum.f32(float %t7, float %m6) + %m8 = tail call fast float @llvm.minnum.f32(float %t8, float %m7) + %m9 = tail call fast float @llvm.minnum.f32(float %t9, float %m8) + %m10 = tail call fast float @llvm.minnum.f32(float %t10, float %m9) + %m11 = tail call fast float @llvm.minnum.f32(float %t11, float %m10) + ret float %m11 +} + +define double @fmax_double_4_nums_seq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fmax_double_4_nums_seq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP1]]) +; CHECK-NEXT: ret double [[TMP2]] +; + %g1 = getelementptr inbounds double, ptr %x, i64 1 + %g2 = getelementptr inbounds double, ptr %x, i64 2 + %g3 = getelementptr inbounds double, ptr %x, i64 3 + %t0 = load double, ptr %x, align 4 + %t1 = load double, ptr %g1, align 4 + %t2 = load double, ptr %g2, align 4 + %t3 = load double, ptr %g3, align 4 + %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) + ret double %m3 +} + +define double @fmax_double_16_nums_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fmax_double_16_nums_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 +; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16 +; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18 +; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20 +; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22 +; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24 +; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26 +; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28 +; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30 +; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4 +; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4 +; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4 +; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4 +; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4 +; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4 +; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4 +; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4 +; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15 +; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> [[TMP16]]) +; CHECK-NEXT: ret double [[TMP17]] +; + %g1 = getelementptr inbounds double, ptr %x, i64 2 + %g2 = getelementptr inbounds double, ptr %x, i64 4 + %g3 = getelementptr inbounds double, ptr %x, i64 6 + %g4 = getelementptr inbounds double, ptr %x, i64 8 + %g5 = getelementptr inbounds double, ptr %x, i64 10 + %g6 = getelementptr inbounds double, ptr %x, i64 12 + %g7 = getelementptr inbounds double, ptr %x, i64 14 + %g8 = getelementptr inbounds double, ptr %x, i64 16 + %g9 = getelementptr inbounds double, ptr %x, i64 18 + %g10 = getelementptr inbounds double, ptr %x, i64 20 + %g11 = getelementptr inbounds double, ptr %x, i64 22 + %g12 = getelementptr inbounds double, ptr %x, i64 24 + %g13 = getelementptr inbounds double, ptr %x, i64 26 + %g14 = getelementptr inbounds double, ptr %x, i64 28 + %g15 = getelementptr inbounds double, ptr %x, i64 30 + %t0 = load double, ptr %x, align 4 + %t1 = load double, ptr %g1, align 4 + %t2 = load double, ptr %g2, align 4 + %t3 = load double, ptr %g3, align 4 + %t4 = load double, ptr %g4, align 4 + %t5 = load double, ptr %g5, align 4 + %t6 = load double, ptr %g6, align 4 + %t7 = load double, ptr %g7, align 4 + %t8 = load double, ptr %g8, align 4 + %t9 = load double, ptr %g9, align 4 + %t10 = load double, ptr %g10, align 4 + %t11 = load double, ptr %g11, align 4 + %t12 = load double, ptr %g12, align 4 + %t13 = load double, ptr %g13, align 4 + %t14 = load double, ptr %g14, align 4 + %t15 = load double, ptr %g15, align 4 + %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) + %m4 = tail call fast double @llvm.maxnum.f64(double %t4, double %m3) + %m5 = tail call fast double @llvm.maxnum.f64(double %t5, double %m4) + %m6 = tail call fast double @llvm.maxnum.f64(double %t6, double %m5) + %m7 = tail call fast double @llvm.maxnum.f64(double %t7, double %m6) + %m8 = tail call fast double @llvm.maxnum.f64(double %t8, double %m7) + %m9 = tail call fast double @llvm.maxnum.f64(double %t9, double %m8) + %m10 = tail call fast double @llvm.maxnum.f64(double %t10, double %m9) + %m11 = tail call fast double @llvm.maxnum.f64(double %t11, double %m10) + %m12 = tail call fast double @llvm.maxnum.f64(double %t12, double %m11) + %m13 = tail call fast double @llvm.maxnum.f64(double %t13, double %m12) + %m14 = tail call fast double @llvm.maxnum.f64(double %t14, double %m13) + %m15 = tail call fast double @llvm.maxnum.f64(double %t15, double %m14) + ret double %m15 +} + +define float @fmax_float_12_nums_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define float @fmax_float_12_nums_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 +; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 +; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 +; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 +; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 +; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4 +; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4 +; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4 +; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4 +; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11 +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmax.v12f32(<12 x float> [[TMP12]]) +; CHECK-NEXT: ret float [[TMP13]] +; + %g1 = getelementptr inbounds float, ptr %x, i64 2 + %g2 = getelementptr inbounds float, ptr %x, i64 4 + %g3 = getelementptr inbounds float, ptr %x, i64 6 + %g4 = getelementptr inbounds float, ptr %x, i64 8 + %g5 = getelementptr inbounds float, ptr %x, i64 10 + %g6 = getelementptr inbounds float, ptr %x, i64 12 + %g7 = getelementptr inbounds float, ptr %x, i64 14 + %g8 = getelementptr inbounds float, ptr %x, i64 16 + %g9 = getelementptr inbounds float, ptr %x, i64 18 + %g10 = getelementptr inbounds float, ptr %x, i64 20 + %g11 = getelementptr inbounds float, ptr %x, i64 22 + %t0 = load float, ptr %x, align 4 + %t1 = load float, ptr %g1, align 4 + %t2 = load float, ptr %g2, align 4 + %t3 = load float, ptr %g3, align 4 + %t4 = load float, ptr %g4, align 4 + %t5 = load float, ptr %g5, align 4 + %t6 = load float, ptr %g6, align 4 + %t7 = load float, ptr %g7, align 4 + %t8 = load float, ptr %g8, align 4 + %t9 = load float, ptr %g9, align 4 + %t10 = load float, ptr %g10, align 4 + %t11 = load float, ptr %g11, align 4 + %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) + %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3) + %m5 = tail call fast float @llvm.maxnum.f32(float %t5, float %m4) + %m6 = tail call fast float @llvm.maxnum.f32(float %t6, float %m5) + %m7 = tail call fast float @llvm.maxnum.f32(float %t7, float %m6) + %m8 = tail call fast float @llvm.maxnum.f32(float %t8, float %m7) + %m9 = tail call fast float @llvm.maxnum.f32(float %t9, float %m8) + %m10 = tail call fast float @llvm.maxnum.f32(float %t10, float %m9) + %m11 = tail call fast float @llvm.maxnum.f32(float %t11, float %m10) + ret float %m11 +} + +declare float @llvm.minnum.f32(float, float) +declare double @llvm.minnum.f64(double, double) +declare float @llvm.maxnum.f32(float, float) +declare double @llvm.maxnum.f64(double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmul.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmul.ll new file mode 100644 index 0000000000000..e08b38c69a840 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/reductions-fmul.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \ +; RUN: | FileCheck %s + +; Test vectorization and reassociation of fmul operations. If the loads can +; be vectorized, cases of fewer operands are also profitable to vectorize. + +define double @fmul_double_4_factors_seq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fmul_double_4_factors_seq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[X]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call reassoc nsz arcp contract afn double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> [[TMP0]]) +; CHECK-NEXT: ret double [[TMP1]] +; +entry: + %0 = load double, ptr %x, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %x, i64 1 + %1 = load double, ptr %arrayidx1, align 8 + %mul = fmul reassoc nsz arcp contract afn double %1, %0 + %arrayidx2 = getelementptr inbounds double, ptr %x, i64 2 + %2 = load double, ptr %arrayidx2, align 8 + %mul3 = fmul reassoc nsz arcp contract afn double %mul, %2 + %arrayidx4 = getelementptr inbounds double, ptr %x, i64 3 + %3 = load double, ptr %arrayidx4, align 8 + %mul5 = fmul reassoc nsz arcp contract afn double %mul3, %3 + ret double %mul5 +} + +define double @fmul_double_8_factors_nonseq(ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define double @fmul_double_8_factors_nonseq( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[X]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[ARRAYIDX4]], align 8 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX6]], align 8 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 +; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX10]], align 8 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 +; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[TMP5]], i32 5 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[TMP6]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[TMP7]], i32 7 +; CHECK-NEXT: [[TMP16:%.*]] = call reassoc nsz arcp contract afn double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> [[TMP15]]) +; CHECK-NEXT: ret double [[TMP16]] +; +entry: + %0 = load double, ptr %x, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %x, i64 2 + %1 = load double, ptr %arrayidx1, align 8 + %mul = fmul reassoc nsz arcp contract afn double %1, %0 + %arrayidx2 = getelementptr inbounds double, ptr %x, i64 4 + %2 = load double, ptr %arrayidx2, align 8 + %mul3 = fmul reassoc nsz arcp contract afn double %mul, %2 + %arrayidx4 = getelementptr inbounds double, ptr %x, i64 6 + %3 = load double, ptr %arrayidx4, align 8 + %mul5 = fmul reassoc nsz arcp contract afn double %mul3, %3 + %arrayidx6 = getelementptr inbounds double, ptr %x, i64 8 + %4 = load double, ptr %arrayidx6, align 8 + %mul7 = fmul reassoc nsz arcp contract afn double %mul5, %4 + %arrayidx8 = getelementptr inbounds double, ptr %x, i64 10 + %5 = load double, ptr %arrayidx8, align 8 + %mul9 = fmul reassoc nsz arcp contract afn double %mul7, %5 + %arrayidx10 = getelementptr inbounds double, ptr %x, i64 12 + %6 = load double, ptr %arrayidx10, align 8 + %mul11 = fmul reassoc nsz arcp contract afn double %mul9, %6 + %arrayidx12 = getelementptr inbounds double, ptr %x, i64 14 + %7 = load double, ptr %arrayidx12, align 8 + %mul13 = fmul reassoc nsz arcp contract afn double %mul11, %7 + ret double %mul13 +} + +define float @fmul_float_16_factors_nonseq(float noundef %m, ptr nocapture noundef readonly %x) { +; CHECK-LABEL: define float @fmul_float_16_factors_nonseq( +; CHECK-SAME: float noundef [[M:%.*]], ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 +; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 +; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 +; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX16]], align 4 +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX18]], align 4 +; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX22]], align 4 +; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 26 +; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX24]], align 4 +; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds float, ptr [[X]], i64 28 +; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX26]], align 4 +; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 +; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX28]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x float> [[TMP16]], float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x float> [[TMP17]], float [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x float> [[TMP18]], float [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x float> [[TMP19]], float [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x float> [[TMP20]], float [[TMP5]], i32 5 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x float> [[TMP21]], float [[TMP6]], i32 6 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x float> [[TMP22]], float [[TMP7]], i32 7 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x float> [[TMP23]], float [[TMP8]], i32 8 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x float> [[TMP24]], float [[TMP9]], i32 9 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x float> [[TMP25]], float [[TMP10]], i32 10 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x float> [[TMP26]], float [[TMP11]], i32 11 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x float> [[TMP27]], float [[TMP12]], i32 12 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x float> [[TMP28]], float [[TMP13]], i32 13 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x float> [[TMP29]], float [[TMP14]], i32 14 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x float> [[TMP30]], float [[TMP15]], i32 15 +; CHECK-NEXT: [[TMP32:%.*]] = call reassoc nsz arcp contract afn float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> [[TMP31]]) +; CHECK-NEXT: ret float [[TMP32]] +; +entry: + %0 = load float, ptr %x, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %x, i64 2 + %1 = load float, ptr %arrayidx1, align 4 + %mul = fmul reassoc nsz arcp contract afn float %1, %0 + %arrayidx2 = getelementptr inbounds float, ptr %x, i64 4 + %2 = load float, ptr %arrayidx2, align 4 + %mul3 = fmul reassoc nsz arcp contract afn float %mul, %2 + %arrayidx4 = getelementptr inbounds float, ptr %x, i64 6 + %3 = load float, ptr %arrayidx4, align 4 + %mul5 = fmul reassoc nsz arcp contract afn float %mul3, %3 + %arrayidx6 = getelementptr inbounds float, ptr %x, i64 8 + %4 = load float, ptr %arrayidx6, align 4 + %mul7 = fmul reassoc nsz arcp contract afn float %mul5, %4 + %arrayidx8 = getelementptr inbounds float, ptr %x, i64 10 + %5 = load float, ptr %arrayidx8, align 4 + %mul9 = fmul reassoc nsz arcp contract afn float %mul7, %5 + %arrayidx10 = getelementptr inbounds float, ptr %x, i64 12 + %6 = load float, ptr %arrayidx10, align 4 + %mul11 = fmul reassoc nsz arcp contract afn float %mul9, %6 + %arrayidx12 = getelementptr inbounds float, ptr %x, i64 14 + %7 = load float, ptr %arrayidx12, align 4 + %mul13 = fmul reassoc nsz arcp contract afn float %mul11, %7 + %arrayidx14 = getelementptr inbounds float, ptr %x, i64 16 + %8 = load float, ptr %arrayidx14, align 4 + %mul15 = fmul reassoc nsz arcp contract afn float %mul13, %8 + %arrayidx16 = getelementptr inbounds float, ptr %x, i64 18 + %9 = load float, ptr %arrayidx16, align 4 + %mul17 = fmul reassoc nsz arcp contract afn float %mul15, %9 + %arrayidx18 = getelementptr inbounds float, ptr %x, i64 20 + %10 = load float, ptr %arrayidx18, align 4 + %mul19 = fmul reassoc nsz arcp contract afn float %mul17, %10 + %arrayidx20 = getelementptr inbounds float, ptr %x, i64 22 + %11 = load float, ptr %arrayidx20, align 4 + %mul21 = fmul reassoc nsz arcp contract afn float %mul19, %11 + %arrayidx22 = getelementptr inbounds float, ptr %x, i64 24 + %12 = load float, ptr %arrayidx22, align 4 + %mul23 = fmul reassoc nsz arcp contract afn float %mul21, %12 + %arrayidx24 = getelementptr inbounds float, ptr %x, i64 26 + %13 = load float, ptr %arrayidx24, align 4 + %mul25 = fmul reassoc nsz arcp contract afn float %mul23, %13 + %arrayidx26 = getelementptr inbounds float, ptr %x, i64 28 + %14 = load float, ptr %arrayidx26, align 4 + %mul27 = fmul reassoc nsz arcp contract afn float %mul25, %14 + %arrayidx28 = getelementptr inbounds float, ptr %x, i64 30 + %15 = load float, ptr %arrayidx28, align 4 + %mul29 = fmul reassoc nsz arcp contract afn float %mul27, %15 + ret float %mul29 +} From 17cfd016b4621b8ae52bc7cc27570dd6fa8c0129 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 3 Dec 2024 11:17:22 -0500 Subject: [PATCH 085/191] [AMDGPU][Doc] Add `gfx950` to `gfx9-4-generic` in the document --- llvm/docs/AMDGPUUsage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 411a1209ef947..c076b877bc8a1 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -586,7 +586,7 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor ``gfx9-4-generic`` ``amdgcn`` - ``gfx940`` - xnack - Absolute flat FP8 and BF8 instructions, - ``gfx941`` - sramecc scratch FP8 and BF8 conversion instructions, - ``gfx942`` as well as instructions with XF32 format support - are not available. + - ``gfx950`` are not available. ``gfx10-1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are From c9fbabfdc92f12b2b0148762e6e789157a172e4d Mon Sep 17 00:00:00 2001 From: mandymi <131526806+mandymimi@users.noreply.github.com> Date: Wed, 4 Dec 2024 00:22:56 +0800 Subject: [PATCH 086/191] [ASTMatcher] Fix redundant macro expansion checks in getExpansionLocOfMacro (#117143) A performance issue was descibed in #114521 **Root Cause**: The function getExpansionLocOfMacro is responsible for finding the expansion location of macros. When dealing with macro parameters, it recursively calls itself to check the expansion of macro arguments. This recursive logic redundantly checks previous macro expansions, leading to significant performance degradation when macros are heavily nested. Solution **Modification**: Track already processed macros during recursion. Implementation Details: Introduced a data structure to record processed macros. Before each recursive call, check if the macro has already been processed to avoid redundant calculations. **Testing**: 1. refer to #114521 Finder->addMatcher(expr(isExpandedFromMacro("NULL")).bind("E"), this); run clang-tidy on freecad/src/Mod/Path/App/AreaPyImp.cpp clang-tidy src/Mod/Path/App/AreaPyImp.cpp -checks=-*,testchecker -p=build/compile_commands.json checker runs normally 2. check-clang-unit pass --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 28 +++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 395da768f7c32..922f49c453e15 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -971,6 +971,8 @@ AST Matchers - Ensure ``hasName`` matches template specializations across inline namespaces, making `matchesNodeFullSlow` and `matchesNodeFullFast` consistent. +- Improved the performance of the ``getExpansionLocOfMacro`` by tracking already processed macros during recursion. + - Add ``exportDecl`` matcher to match export declaration. clang-format diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index cdbdb65195409..84a7fa4d36b48 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -21,6 +21,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -697,20 +698,27 @@ static bool isTokenAtLoc(const SourceManager &SM, const LangOptions &LangOpts, return !Invalid && Text == TokenText; } -std::optional -getExpansionLocOfMacro(StringRef MacroName, SourceLocation Loc, - const ASTContext &Context) { +static std::optional getExpansionLocOfMacroRecursive( + StringRef MacroName, SourceLocation Loc, const ASTContext &Context, + llvm::DenseSet &CheckedLocations) { auto &SM = Context.getSourceManager(); const LangOptions &LangOpts = Context.getLangOpts(); while (Loc.isMacroID()) { + if (CheckedLocations.count(Loc)) + return std::nullopt; + CheckedLocations.insert(Loc); SrcMgr::ExpansionInfo Expansion = SM.getSLocEntry(SM.getFileID(Loc)).getExpansion(); - if (Expansion.isMacroArgExpansion()) + if (Expansion.isMacroArgExpansion()) { // Check macro argument for an expansion of the given macro. For example, // `F(G(3))`, where `MacroName` is `G`. - if (std::optional ArgLoc = getExpansionLocOfMacro( - MacroName, Expansion.getSpellingLoc(), Context)) + if (std::optional ArgLoc = + getExpansionLocOfMacroRecursive(MacroName, + Expansion.getSpellingLoc(), + Context, CheckedLocations)) { return ArgLoc; + } + } Loc = Expansion.getExpansionLocStart(); if (isTokenAtLoc(SM, LangOpts, MacroName, Loc)) return Loc; @@ -718,6 +726,14 @@ getExpansionLocOfMacro(StringRef MacroName, SourceLocation Loc, return std::nullopt; } +std::optional +getExpansionLocOfMacro(StringRef MacroName, SourceLocation Loc, + const ASTContext &Context) { + llvm::DenseSet CheckedLocations; + return getExpansionLocOfMacroRecursive(MacroName, Loc, Context, + CheckedLocations); +} + std::shared_ptr createAndVerifyRegex(StringRef Regex, llvm::Regex::RegexFlags Flags, StringRef MatcherID) { From 8da490320f6dcb99b4efef2cdb3d21002db1d2f7 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Tue, 3 Dec 2024 16:28:35 +0000 Subject: [PATCH 087/191] Reland of #108413: [Offload] Introduce offload-tblgen and initial new API implementation (#118503) This is another attempt to reland the changes from #108413 The previous two attempts introduced regressions and were reverted. This PR has been more thoroughly tested with various configurations so shouldn't cause any problems this time. If anyone is aware of any likely remaining problems then please let me know. The changes are identical other than the fixes contained in the last 5 commits. --- ### New API Previous discussions at the LLVM/Offload meeting have brought up the need for a new API for exposing the functionality of the plugins. This change introduces a very small subset of a new API, which is primarily for testing the offload tooling and demonstrating how a new API can fit into the existing code base without being too disruptive. Exact designs for these entry points and future additions can be worked out over time. The new API does however introduce the bare minimum functionality to implement device discovery for Unified Runtime and SYCL. This means that the `urinfo` and `sycl-ls` tools can be used on top of Offload. A (rough) implementation of a Unified Runtime adapter (aka plugin) for Offload is available [here](https://github.com/callumfare/unified-runtime/tree/offload_adapter). Our intention is to maintain this and use it to implement and test Offload API changes with SYCL. ### Demoing the new API ```sh # From the runtime build directory $ ninja LibomptUnitTests $ OFFLOAD_TRACE=1 ./offload/unittests/OffloadAPI/offload.unittests ``` ### Open questions and future work * Only some of the available device info is exposed, and not all the possible device queries needed for SYCL are implemented by the plugins. A sensible next step would be to refactor and extend the existing device info queries in the plugins. The existing info queries are all strings, but the new API introduces the ability to return any arbitrary type. * It may be sensible at some point for the plugins to implement the new API directly, and the higher level code on top of it could be made generic, but this is more of a long-term possibility. --- offload/CMakeLists.txt | 3 + offload/cmake/OpenMPTesting.cmake | 12 + offload/liboffload/API/APIDefs.td | 212 ++++++ offload/liboffload/API/CMakeLists.txt | 25 + offload/liboffload/API/Common.td | 141 ++++ offload/liboffload/API/Device.td | 106 +++ offload/liboffload/API/OffloadAPI.td | 15 + offload/liboffload/API/Platform.td | 112 ++++ offload/liboffload/API/README.md | 150 +++++ offload/liboffload/CMakeLists.txt | 37 ++ offload/liboffload/README.md | 8 + offload/liboffload/exports | 6 + offload/liboffload/include/OffloadImpl.hpp | 94 +++ .../liboffload/include/generated/OffloadAPI.h | 610 ++++++++++++++++++ .../include/generated/OffloadEntryPoints.inc | 441 +++++++++++++ .../include/generated/OffloadFuncs.inc | 34 + .../generated/OffloadImplFuncDecls.inc | 38 ++ .../include/generated/OffloadPrint.hpp | 428 ++++++++++++ offload/liboffload/src/Helpers.hpp | 95 +++ offload/liboffload/src/OffloadImpl.cpp | 247 +++++++ offload/liboffload/src/OffloadLib.cpp | 44 ++ .../common/include/PluginInterface.h | 4 + offload/test/lit.cfg | 3 +- offload/test/lit.site.cfg.in | 1 + .../tools/offload-tblgen/default_returns.td | 40 ++ .../test/tools/offload-tblgen/entry_points.td | 37 ++ .../tools/offload-tblgen/functions_basic.td | 39 ++ .../offload-tblgen/functions_code_loc.td | 26 + .../offload-tblgen/functions_ranged_param.td | 36 ++ .../test/tools/offload-tblgen/print_enum.td | 34 + .../tools/offload-tblgen/print_function.td | 38 ++ .../tools/offload-tblgen/type_tagged_enum.td | 76 +++ offload/tools/offload-tblgen/APIGen.cpp | 229 +++++++ offload/tools/offload-tblgen/CMakeLists.txt | 24 + .../tools/offload-tblgen/EntryPointGen.cpp | 138 ++++ offload/tools/offload-tblgen/FuncsGen.cpp | 74 +++ offload/tools/offload-tblgen/GenCommon.hpp | 67 ++ offload/tools/offload-tblgen/Generators.hpp | 23 + offload/tools/offload-tblgen/PrintGen.cpp | 226 +++++++ offload/tools/offload-tblgen/RecordTypes.hpp | 227 +++++++ .../tools/offload-tblgen/offload-tblgen.cpp | 101 +++ offload/unittests/CMakeLists.txt | 3 +- offload/unittests/OffloadAPI/CMakeLists.txt | 16 + .../OffloadAPI/common/Environment.cpp | 96 +++ .../OffloadAPI/common/Environment.hpp | 17 + .../unittests/OffloadAPI/common/Fixtures.hpp | 64 ++ .../OffloadAPI/device/olDeviceInfo.hpp | 21 + .../OffloadAPI/device/olGetDevice.cpp | 39 ++ .../OffloadAPI/device/olGetDeviceCount.cpp | 28 + .../OffloadAPI/device/olGetDeviceInfo.cpp | 76 +++ .../OffloadAPI/device/olGetDeviceInfoSize.cpp | 58 ++ .../OffloadAPI/platform/olGetPlatform.cpp | 28 + .../platform/olGetPlatformCount.cpp | 22 + .../OffloadAPI/platform/olGetPlatformInfo.cpp | 76 +++ .../platform/olGetPlatformInfoSize.cpp | 57 ++ .../OffloadAPI/platform/olPlatformInfo.hpp | 20 + 56 files changed, 4920 insertions(+), 2 deletions(-) create mode 100644 offload/liboffload/API/APIDefs.td create mode 100644 offload/liboffload/API/CMakeLists.txt create mode 100644 offload/liboffload/API/Common.td create mode 100644 offload/liboffload/API/Device.td create mode 100644 offload/liboffload/API/OffloadAPI.td create mode 100644 offload/liboffload/API/Platform.td create mode 100644 offload/liboffload/API/README.md create mode 100644 offload/liboffload/CMakeLists.txt create mode 100644 offload/liboffload/README.md create mode 100644 offload/liboffload/exports create mode 100644 offload/liboffload/include/OffloadImpl.hpp create mode 100644 offload/liboffload/include/generated/OffloadAPI.h create mode 100644 offload/liboffload/include/generated/OffloadEntryPoints.inc create mode 100644 offload/liboffload/include/generated/OffloadFuncs.inc create mode 100644 offload/liboffload/include/generated/OffloadImplFuncDecls.inc create mode 100644 offload/liboffload/include/generated/OffloadPrint.hpp create mode 100644 offload/liboffload/src/Helpers.hpp create mode 100644 offload/liboffload/src/OffloadImpl.cpp create mode 100644 offload/liboffload/src/OffloadLib.cpp create mode 100644 offload/test/tools/offload-tblgen/default_returns.td create mode 100644 offload/test/tools/offload-tblgen/entry_points.td create mode 100644 offload/test/tools/offload-tblgen/functions_basic.td create mode 100644 offload/test/tools/offload-tblgen/functions_code_loc.td create mode 100644 offload/test/tools/offload-tblgen/functions_ranged_param.td create mode 100644 offload/test/tools/offload-tblgen/print_enum.td create mode 100644 offload/test/tools/offload-tblgen/print_function.td create mode 100644 offload/test/tools/offload-tblgen/type_tagged_enum.td create mode 100644 offload/tools/offload-tblgen/APIGen.cpp create mode 100644 offload/tools/offload-tblgen/CMakeLists.txt create mode 100644 offload/tools/offload-tblgen/EntryPointGen.cpp create mode 100644 offload/tools/offload-tblgen/FuncsGen.cpp create mode 100644 offload/tools/offload-tblgen/GenCommon.hpp create mode 100644 offload/tools/offload-tblgen/Generators.hpp create mode 100644 offload/tools/offload-tblgen/PrintGen.cpp create mode 100644 offload/tools/offload-tblgen/RecordTypes.hpp create mode 100644 offload/tools/offload-tblgen/offload-tblgen.cpp create mode 100644 offload/unittests/OffloadAPI/CMakeLists.txt create mode 100644 offload/unittests/OffloadAPI/common/Environment.cpp create mode 100644 offload/unittests/OffloadAPI/common/Environment.hpp create mode 100644 offload/unittests/OffloadAPI/common/Fixtures.hpp create mode 100644 offload/unittests/OffloadAPI/device/olDeviceInfo.hpp create mode 100644 offload/unittests/OffloadAPI/device/olGetDevice.cpp create mode 100644 offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp create mode 100644 offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp create mode 100644 offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp create mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatform.cpp create mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp create mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp create mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp create mode 100644 offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt index e24f0faa91211..deae8d1a6b50c 100644 --- a/offload/CMakeLists.txt +++ b/offload/CMakeLists.txt @@ -353,6 +353,9 @@ add_subdirectory(tools) # Build target agnostic offloading library. add_subdirectory(src) +add_subdirectory(tools/offload-tblgen) +add_subdirectory(liboffload) + # Add tests. add_subdirectory(test) diff --git a/offload/cmake/OpenMPTesting.cmake b/offload/cmake/OpenMPTesting.cmake index 6609d6301d0f9..f97def2c52eba 100644 --- a/offload/cmake/OpenMPTesting.cmake +++ b/offload/cmake/OpenMPTesting.cmake @@ -48,6 +48,17 @@ function(find_standalone_test_dependencies) return() endif() + find_program(OFFLOAD_TBLGEN_EXECUTABLE + NAMES offload-tblgen + PATHS ${OPENMP_LLVM_TOOLS_DIR}) + if (NOT OFFLOAD_TBLGEN_EXECUTABLE) + message(STATUS "Cannot find 'offload-tblgen'.") + message(STATUS "Please put 'not' in your PATH, set OFFLOAD_TBLGEN_EXECUTABLE to its full path, or point OPENMP_LLVM_TOOLS_DIR to its directory.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_TARGETS FALSE PARENT_SCOPE) + return() + endif() + find_program(OPENMP_NOT_EXECUTABLE NAMES not PATHS ${OPENMP_LLVM_TOOLS_DIR}) @@ -82,6 +93,7 @@ else() set(OPENMP_FILECHECK_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/FileCheck) endif() set(OPENMP_NOT_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/not) + set(OFFLOAD_TBLGEN_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/offload-tblgen) set(OFFLOAD_DEVICE_INFO_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-offload-device-info) endif() diff --git a/offload/liboffload/API/APIDefs.td b/offload/liboffload/API/APIDefs.td new file mode 100644 index 0000000000000..60c1b85d26911 --- /dev/null +++ b/offload/liboffload/API/APIDefs.td @@ -0,0 +1,212 @@ +//===-- APIDefs.td - Base definitions for Offload tablegen -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the class definitions used to implement the Offload API, +// as well as helper functions used to help populate relevant records. +// See offload/API/README.md for more detailed documentation. +// +//===----------------------------------------------------------------------===// + +// Prefix for API naming. This could be hard-coded in the future when a value +// is agreed upon. +defvar PREFIX = "OL"; +defvar prefix = !tolower(PREFIX); + +// Parameter flags +defvar PARAM_IN = 0x1; +defvar PARAM_OUT = 0x2; +defvar PARAM_OPTIONAL = 0x4; +defvar PARAM_IN_OPTIONAL = !or(PARAM_IN, PARAM_OPTIONAL); +defvar PARAM_OUT_OPTIONAL = !or(PARAM_OUT, PARAM_OPTIONAL); + +// Does the type end with '_handle_t'? +class IsHandleType { + // size("_handle_t") == 9 + bit ret = !if(!lt(!size(Type), 9), 0, + !ne(!find(Type, "_handle_t", !sub(!size(Type), 9)), -1)); +} + +// Does the type end with '*'? +class IsPointerType { + bit ret = !ne(!find(Type, "*", !sub(!size(Type), 1)), -1); +} + +// Describes the valid range of a pointer parameter that reperesents an array +class Range { + string begin = Begin; + string end = End; +} + +// Names the parameters that indicate the type and size of the data pointed to +// by an opaque pointer parameter +class TypeInfo { + string enum = TypeEnum; + string size = TypeSize; +} + +class Param Flags = 0> { + string type = Type; + string name = Name; + string desc = Desc; + bits<3> flags = Flags; + Range range = Range<"", "">; + TypeInfo type_info = TypeInfo<"", "">; + bit IsHandle = IsHandleType.ret; + bit IsPointer = IsPointerType.ret; +} + +// A parameter whose range is described by other parameters in the function. +class RangedParam Flags, Range ParamRange> : Param { + let range = ParamRange; +} + +// A parameter (normally of type void*) which has its pointee type and size +// described by other parameters in the function. +class TypeTaggedParam Flags, TypeInfo ParamTypeInfo> : Param { + let type_info = ParamTypeInfo; +} + +class Return Conditions = []> { + string value = Value; + list conditions = Conditions; +} + +class ShouldCheckHandle { + bit ret = !and(P.IsHandle, !eq(!and(PARAM_OPTIONAL, P.flags), 0)); +} + +class ShouldCheckPointer { + bit ret = !and(P.IsPointer, !eq(!and(PARAM_OPTIONAL, P.flags), 0)); +} + +// For a list of returns that contains a specific return code, find and append +// new conditions to that return +class AppendConditionsToReturn Returns, string ReturnValue, + list Conditions> { + list ret = + !foreach(Ret, Returns, + !if(!eq(Ret.value, ReturnValue), + Return, Ret)); +} + +// Add null handle checks to a function's return value descriptions +class AddHandleChecksToReturns Params, list Returns> { + list handle_params = + !foreach(P, Params, !if(ShouldCheckHandle

.ret, P.name, "")); + list handle_params_filt = + !filter(param, handle_params, !ne(param, "")); + list handle_param_conds = + !foreach(handle, handle_params_filt, "`NULL == "#handle#"`"); + + // Does the list of returns already contain ERROR_INVALID_NULL_HANDLE? + bit returns_has_inv_handle = !foldl( + 0, Returns, HasErr, Ret, + !or(HasErr, !eq(Ret.value, PREFIX#"_ERRC_INVALID_NULL_HANDLE"))); + + list returns_out = !if(returns_has_inv_handle, + AppendConditionsToReturn.ret, + !listconcat(Returns, [Return]) + ); +} + +// Add null pointer checks to a function's return value descriptions +class AddPointerChecksToReturns Params, list Returns> { + list ptr_params = + !foreach(P, Params, !if(ShouldCheckPointer

.ret, P.name, "")); + list ptr_params_filt = !filter(param, ptr_params, !ne(param, "")); + list ptr_param_conds = + !foreach(ptr, ptr_params_filt, "`NULL == "#ptr#"`"); + + // Does the list of returns already contain ERROR_INVALID_NULL_POINTER? + bit returns_has_inv_ptr = !foldl( + 0, Returns, HasErr, Ret, + !or(HasErr, !eq(Ret.value, PREFIX#"_ERRC_INVALID_NULL_POINTER"))); + list returns_out = !if(returns_has_inv_ptr, + AppendConditionsToReturn.ret, + !listconcat(Returns, [Return]) + ); +} + +defvar DefaultReturns = [Return, + Return, + Return]; + +class APIObject { + string name; + string desc; +} + +class Function : APIObject { + list params; + list returns; + list details = []; + list analogues = []; + + list returns_with_def = !listconcat(DefaultReturns, returns); + list all_returns = AddPointerChecksToReturns.returns_out>.returns_out; +} + +class Etor { + string name = Name; + string desc = Desc; + string tagged_type; +} + +class TaggedEtor : Etor { + let tagged_type = Type; +} + +class Enum : APIObject { + // This refers to whether the enumerator descriptions specify a return + // type for functions where this enum may be used as an output type. If set, + // all Etor values must be TaggedEtor records + bit is_typed = 0; + + list etors = []; +} + +class StructMember { + string type = Type; + string name = Name; + string desc = Desc; +} + +defvar DefaultPropStructMembers = + [StructMember, + StructMember<"void*", "pNext", "pointer to extension-specific structure">]; + +class StructHasInheritedMembers { + bit ret = !or(!eq(BaseClass, prefix#"_base_properties_t"), + !eq(BaseClass, prefix#"_base_desc_t")); +} + +class Struct : APIObject { + string base_class = ""; + list members; + list all_members = + !if(StructHasInheritedMembers.ret, + DefaultPropStructMembers, [])#members; +} + +class Typedef : APIObject { string value; } + +class FptrTypedef : APIObject { + list params; + list returns; +} + +class Macro : APIObject { + string value; + + string condition; + string alt_value; +} + +class Handle : APIObject; diff --git a/offload/liboffload/API/CMakeLists.txt b/offload/liboffload/API/CMakeLists.txt new file mode 100644 index 0000000000000..8fd6cb539374a --- /dev/null +++ b/offload/liboffload/API/CMakeLists.txt @@ -0,0 +1,25 @@ +# The OffloadGenerate target is used to regenerate the generated files in the +# include directory. These files are checked in with the rest of the source, +# therefore it is only needed when making changes to the API. + +find_program(CLANG_FORMAT clang-format PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) +if (CLANG_FORMAT) + set(LLVM_TARGET_DEFINITIONS ${CMAKE_CURRENT_SOURCE_DIR}/OffloadAPI.td) + + tablegen(OFFLOAD OffloadAPI.h -gen-api) + tablegen(OFFLOAD OffloadEntryPoints.inc -gen-entry-points) + tablegen(OFFLOAD OffloadFuncs.inc -gen-func-names) + tablegen(OFFLOAD OffloadImplFuncDecls.inc -gen-impl-func-decls) + tablegen(OFFLOAD OffloadPrint.hpp -gen-print-header) + + set(OFFLOAD_GENERATED_FILES ${TABLEGEN_OUTPUT}) + add_public_tablegen_target(OffloadGenerate) + add_custom_command(TARGET OffloadGenerate POST_BUILD COMMAND ${CLANG_FORMAT} + -i ${OFFLOAD_GENERATED_FILES}) + add_custom_command(TARGET OffloadGenerate POST_BUILD COMMAND ${CMAKE_COMMAND} + -E copy_if_different ${OFFLOAD_GENERATED_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/../include/generated") +else() + message(WARNING "clang-format was not found, so the OffloadGenerate target\ + will not be available. Offload will still build, but you will not be\ + able to make changes to the API.") +endif() diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td new file mode 100644 index 0000000000000..5b19d1d47129e --- /dev/null +++ b/offload/liboffload/API/Common.td @@ -0,0 +1,141 @@ +def : Macro { + let name = "OL_VERSION_MAJOR"; + let desc = "Major version of the Offload API"; + let value = "0"; +} + +def : Macro { + let name = "OL_VERSION_MINOR"; + let desc = "Minor version of the Offload API"; + let value = "0"; +} + +def : Macro { + let name = "OL_VERSION_PATCH"; + let desc = "Patch version of the Offload API"; + let value = "1"; +} + +def : Macro { + let name = "OL_APICALL"; + let desc = "Calling convention for all API functions"; + let condition = "defined(_WIN32)"; + let value = "__cdecl"; + let alt_value = ""; +} + +def : Macro { + let name = "OL_APIEXPORT"; + let desc = "Microsoft-specific dllexport storage-class attribute"; + let condition = "defined(_WIN32)"; + let value = "__declspec(dllexport)"; + let alt_value = ""; +} + +def : Macro { + let name = "OL_DLLEXPORT"; + let desc = "Microsoft-specific dllexport storage-class attribute"; + let condition = "defined(_WIN32)"; + let value = "__declspec(dllexport)"; +} + +def : Macro { + let name = "OL_DLLEXPORT"; + let desc = "GCC-specific dllexport storage-class attribute"; + let condition = "__GNUC__ >= 4"; + let value = "__attribute__ ((visibility (\"default\")))"; + let alt_value = ""; +} + +def : Handle { + let name = "ol_platform_handle_t"; + let desc = "Handle of a platform instance"; +} + +def : Handle { + let name = "ol_device_handle_t"; + let desc = "Handle of platform's device object"; +} + +def : Handle { + let name = "ol_context_handle_t"; + let desc = "Handle of context object"; +} + +def : Enum { + let name = "ol_errc_t"; + let desc = "Defines Return/Error codes"; + let etors =[ + Etor<"SUCCESS", "Success">, + Etor<"INVALID_VALUE", "Invalid Value">, + Etor<"INVALID_PLATFORM", "Invalid platform">, + Etor<"DEVICE_NOT_FOUND", "Device not found">, + Etor<"INVALID_DEVICE", "Invalid device">, + Etor<"DEVICE_LOST", "Device hung, reset, was removed, or driver update occurred">, + Etor<"UNINITIALIZED", "plugin is not initialized or specific entry-point is not implemented">, + Etor<"OUT_OF_RESOURCES", "Out of resources">, + Etor<"UNSUPPORTED_VERSION", "generic error code for unsupported versions">, + Etor<"UNSUPPORTED_FEATURE", "generic error code for unsupported features">, + Etor<"INVALID_ARGUMENT", "generic error code for invalid arguments">, + Etor<"INVALID_NULL_HANDLE", "handle argument is not valid">, + Etor<"INVALID_NULL_POINTER", "pointer argument may not be nullptr">, + Etor<"INVALID_SIZE", "invalid size or dimensions (e.g., must not be zero, or is out of bounds)">, + Etor<"INVALID_ENUMERATION", "enumerator argument is not valid">, + Etor<"UNSUPPORTED_ENUMERATION", "enumerator argument is not supported by the device">, + Etor<"UNKNOWN", "Unknown or internal error"> + ]; +} + +def : Struct { + let name = "ol_error_struct_t"; + let desc = "Details of the error condition returned by an API call"; + let members = [ + StructMember<"ol_errc_t", "Code", "The error code">, + StructMember<"const char*", "Details", "String containing error details"> + ]; +} + +def : Typedef { + let name = "ol_result_t"; + let desc = "Result type returned by all entry points."; + let value = "const ol_error_struct_t*"; +} + +def : Macro { + let name = "OL_SUCCESS"; + let desc = "Success condition"; + let value = "NULL"; +} + +def : Struct { + let name = "ol_code_location_t"; + let desc = "Code location information that can optionally be associated with an API call"; + let members = [ + StructMember<"const char*", "FunctionName", "Function name">, + StructMember<"const char*", "SourceFile", "Source code file">, + StructMember<"uint32_t", "LineNumber", "Source code line number">, + StructMember<"uint32_t", "ColumnNumber", "Source code column number"> + ]; +} + +def : Function { + let name = "olInit"; + let desc = "Perform initialization of the Offload library and plugins"; + let details = [ + "This must be the first API call made by a user of the Offload library", + "Each call will increment an internal reference count that is decremented by `olShutDown`" + ]; + let params = []; + let returns = []; +} + +def : Function { + let name = "olShutDown"; + let desc = "Release the resources in use by Offload"; + let details = [ + "This decrements an internal reference count. When this reaches 0, all resources will be released", + "Subsequent API calls made after this are not valid" + ]; + let params = []; + let returns = []; +} diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td new file mode 100644 index 0000000000000..30c0b71fe7b37 --- /dev/null +++ b/offload/liboffload/API/Device.td @@ -0,0 +1,106 @@ +//===-- Device.td - Device definitions for Offload ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains Offload API definitions related to the Device handle +// +//===----------------------------------------------------------------------===// + +def : Enum { + let name = "ol_device_type_t"; + let desc = "Supported device types"; + let etors =[ + Etor<"DEFAULT", "The default device type as preferred by the runtime">, + Etor<"ALL", "Devices of all types">, + Etor<"GPU", "GPU device type">, + Etor<"CPU", "CPU device type">, + ]; +} + +def : Enum { + let name = "ol_device_info_t"; + let desc = "Supported device info"; + let is_typed = 1; + let etors =[ + TaggedEtor<"TYPE", "ol_device_type_t", "type of the device">, + TaggedEtor<"PLATFORM", "ol_platform_handle_t", "the platform associated with the device">, + TaggedEtor<"NAME", "char[]", "Device name">, + TaggedEtor<"VENDOR", "char[]", "Device vendor">, + TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version"> + ]; +} + +def : Function { + let name = "olGetDeviceCount"; + let desc = "Retrieves the number of available devices within a platform"; + let params = [ + Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>, + Param<"uint32_t*", "NumDevices", "pointer to the number of devices.", PARAM_OUT> + ]; + let returns = []; +} + +def : Function { + let name = "olGetDevice"; + let desc = "Retrieves devices within a platform"; + let details = [ + "Multiple calls to this function will return identical device handles, in the same order.", + ]; + let params = [ + Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>, + Param<"uint32_t", "NumEntries", "the number of devices to be added to phDevices, which must be greater than zero", PARAM_IN>, + RangedParam<"ol_device_handle_t*", "Devices", "Array of device handles. " + "If NumEntries is less than the number of devices available, then this function shall only retrieve that number of devices.", PARAM_OUT, + Range<"0", "NumEntries">> + ]; + let returns = [ + Return<"OL_ERRC_INVALID_SIZE", [ + "`NumEntries == 0`" + ]> + ]; +} + +def : Function { + let name = "olGetDeviceInfo"; + let desc = "Queries the given property of the device"; + let details = []; + let params = [ + Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>, + Param<"ol_device_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, + Param<"size_t", "PropSize", "the number of bytes pointed to by PropValue.", PARAM_IN>, + TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. If PropSize is not equal to or greater than the real " + "number of bytes needed to return the info then the OL_ERRC_INVALID_SIZE error is returned and " + "PropValue is not used.", PARAM_OUT, TypeInfo<"PropName" , "PropSize">> + ]; + let returns = [ + Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ + "If `PropName` is not supported by the device." + ]>, + Return<"OL_ERRC_INVALID_SIZE", [ + "`PropSize == 0`", + "If `PropSize` is less than the real number of bytes needed to return the info." + ]>, + Return<"OL_ERRC_INVALID_DEVICE"> + ]; +} + +def : Function { + let name = "olGetDeviceInfoSize"; + let desc = "Returns the storage size of the given device query"; + let details = []; + let params = [ + Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>, + Param<"ol_device_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, + Param<"size_t*", "PropSizeRet", "pointer to the number of bytes required to store the query", PARAM_OUT> + ]; + let returns = [ + Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ + "If `PropName` is not supported by the device." + ]>, + Return<"OL_ERRC_INVALID_DEVICE"> + ]; +} diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td new file mode 100644 index 0000000000000..8a0c3c4058122 --- /dev/null +++ b/offload/liboffload/API/OffloadAPI.td @@ -0,0 +1,15 @@ +//===-- OffloadAPI.td - Root tablegen file for Offload -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Always include this file first +include "APIDefs.td" + +// Add API definition files here +include "Common.td" +include "Platform.td" +include "Device.td" diff --git a/offload/liboffload/API/Platform.td b/offload/liboffload/API/Platform.td new file mode 100644 index 0000000000000..03e70cf96ac94 --- /dev/null +++ b/offload/liboffload/API/Platform.td @@ -0,0 +1,112 @@ +//===-- Platform.td - Platform definitions for Offload -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains Offload API definitions related to the Platform handle +// +//===----------------------------------------------------------------------===// +def : Function { + let name = "olGetPlatform"; + let desc = "Retrieves all available platforms"; + let details = [ + "Multiple calls to this function will return identical platforms handles, in the same order.", + ]; + let params = [ + Param<"uint32_t", "NumEntries", + "The number of platforms to be added to Platforms. NumEntries must be " + "greater than zero.", + PARAM_IN>, + RangedParam<"ol_platform_handle_t*", "Platforms", + "Array of handle of platforms. If NumEntries is less than the number of " + "platforms available, then olGetPlatform shall only retrieve that " + "number of platforms.", + PARAM_OUT, Range<"0", "NumEntries">> + ]; + let returns = [ + Return<"OL_ERRC_INVALID_SIZE", [ + "`NumEntries == 0`" + ]> + ]; +} + +def : Function { + let name = "olGetPlatformCount"; + let desc = "Retrieves the number of available platforms"; + let params = [ + Param<"uint32_t*", + "NumPlatforms", "returns the total number of platforms available.", + PARAM_OUT> + ]; + let returns = []; +} + +def : Enum { + let name = "ol_platform_info_t"; + let desc = "Supported platform info"; + let is_typed = 1; + let etors = [ + TaggedEtor<"NAME", "char[]", "The string denoting name of the platform. The size of the info needs to be dynamically queried.">, + TaggedEtor<"VENDOR_NAME", "char[]", "The string denoting name of the vendor of the platform. The size of the info needs to be dynamically queried.">, + TaggedEtor<"VERSION", "char[]", "The string denoting the version of the platform. The size of the info needs to be dynamically queried.">, + TaggedEtor<"BACKEND", "ol_platform_backend_t", "The native backend of the platform."> + ]; +} + +def : Enum { + let name = "ol_platform_backend_t"; + let desc = "Identifies the native backend of the platform"; + let etors =[ + Etor<"UNKNOWN", "The backend is not recognized">, + Etor<"CUDA", "The backend is CUDA">, + Etor<"AMDGPU", "The backend is AMDGPU">, + ]; +} + +def : Function { + let name = "olGetPlatformInfo"; + let desc = "Queries the given property of the platform"; + let details = [ + "`olGetPlatformInfoSize` can be used to query the storage size " + "required for the given query." + ]; + let params = [ + Param<"ol_platform_handle_t", "Platform", "handle of the platform", PARAM_IN>, + Param<"ol_platform_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, + Param<"size_t", "PropSize", "the number of bytes pointed to by pPlatformInfo.", PARAM_IN>, + TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. " + "If Size is not equal to or greater to the real number of bytes needed to return the info " + "then the OL_ERRC_INVALID_SIZE error is returned and pPlatformInfo is not used.", PARAM_OUT, + TypeInfo<"PropName" , "PropSize">> + ]; + let returns = [ + Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ + "If `PropName` is not supported by the platform." + ]>, + Return<"OL_ERRC_INVALID_SIZE", [ + "`PropSize == 0`", + "If `PropSize` is less than the real number of bytes needed to return the info." + ]>, + Return<"OL_ERRC_INVALID_PLATFORM"> + ]; +} + +def : Function { + let name = "olGetPlatformInfoSize"; + let desc = "Returns the storage size of the given platform query"; + let details = []; + let params = [ + Param<"ol_platform_handle_t", "Platform", "handle of the platform", PARAM_IN>, + Param<"ol_platform_info_t", "PropName", "type of the info to query", PARAM_IN>, + Param<"size_t*", "PropSizeRet", "pointer to the number of bytes required to store the query", PARAM_OUT> + ]; + let returns = [ + Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ + "If `PropName` is not supported by the platform." + ]>, + Return<"OL_ERRC_INVALID_PLATFORM"> + ]; +} diff --git a/offload/liboffload/API/README.md b/offload/liboffload/API/README.md new file mode 100644 index 0000000000000..38a055811b2d0 --- /dev/null +++ b/offload/liboffload/API/README.md @@ -0,0 +1,150 @@ +# Offload API definitions + +**Note**: This is a work-in-progress. It is loosely based on equivalent +tooling in Unified Runtime. + +The Tablegen files in this directory are used to define the Offload API. They +are used with the `offload-tblgen` tool to generate API headers, print headers, +and other implementation details. + +The root file is `OffloadAPI.td` - additional `.td` files can be included in +this file to add them to the API. + +## API Objects +The API consists of a number of objects, which always have a *name* field and +*description* field, and are one of the following types: + +### Function +Represents an API entry point function. Has a list of returns and parameters. +Also has fields for details (representing a bullet-point list of +information about the function that would otherwise be too detailed for the +description), and analogues (equivalent functions in other APIs). + +#### Parameter +Represents a parameter to a function, has *type*, *name*, and *desc* fields. +Also has a *flags* field containing flags representing whether the parameter is +in, out, or optional. + +The *type* field is used to infer if the parameter is a pointer or handle type. +A *handle* type is a pointer to an opaque struct, used to abstract over +plugin-specific implementation details. + +There are two special variants of a *parameter*: +* **RangedParameter** - Represents a parameter that has a range described by other parameters. Generally these are pointers to an arbitrary number of objects. The range is used for generating validation and printing code. E.g, a range might be between `(0, NumDevices)` +* **TypeTaggedParameter** - Represents a parameter (usually of `void*` type) that has the type and size of its pointee data described by other function parameters. The type is usually described by a type-tagged enum. This allows functions (e.g. `olGetDeviceInfo`) to return data of an arbitrary type. + +#### Return +A return represents a possible return code from the function, and optionally a +list of conditions in which this value may be returned. The conditions list is +not expected to be exhaustive. A condition is considered free-form text, but +if it is wrapped in \`backticks\` then it is treated as literal code +representing an error condition (e.g. `someParam < 1`). These conditions are +used to automatically create validation checks by the `offload-tblgen` +validation generator. + +Returns are automatically generated for functions with pointer or handle +parameters, so API authors do not need to exhaustively add null checks for +these types of parameters. All functions also get a number of default return +values automatically. + + +### Struct +Represents a struct. Contains a list of members, which each have a *type*, +*name*, and *desc*. + +Also optionally takes a *base_class* field. If this is either of the special +`offload_base_properties_t` or `offload_base_desc_t` structs, then the struct +will inherit members from those structs. The generated struct does **not** use +actual C++ inheritance, but instead explicitly has those members copied in, +which preserves ABI compatibility with C. + +### Enum +Represents a C-style enum. Contains a list of `etor` values, which have a name +and description. + +A `TaggedEtor` record type also exists which addtionally takes a type. This type +is used when the enum is used as a parameter to a function with a type-tagged +function parameter (e.g. `olGetDeviceInfo`). + +All enums automatically get a `_FORCE_UINT32 = 0x7fffffff` value, +which forces the underlying type to be uint32. + +### Handle +Represents a pointer to an opaque struct, as described in the Parameter section. +It does not take any extra fields. + +### Typedef +Represents a typedef, contains only a *value* field. + +### Macro +Represents a C preprocessor `#define`. Contains a *value* field. Optionally +takes a *condition* field, which allows the macro to be conditionally defined, +and an *alt_value* field, which represents the value if the condition is false. + +Macro arguments are presented in the *name* field (e.g. name = `mymacro(arg)`). + +While there may seem little point generating a macro from tablegen, doing this +allows the entire source of the header file to be generated from the tablegen +files, rather than requiring a mix of C source and tablegen. + +## Generation + +### API header +``` +./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-api +``` +The comments in the generated header are in Doxygen format, although +generating documentation from them hasn't been implemented yet. + +The entirety of this header is generated by Tablegen, rather than having a predefined header file that includes one or more `.inc` files. This is because this header is expected to be part of the installation and distributed to end-users, so should be self-contained. + +### Entry Points +``` +./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-entry-points +``` +These functions form the actual Offload interface, and are wrappers over the +functions that contain the actual implementation (see +'Adding a new entry point'). + +They implement automatically generated validation checks, and tracing of +function calls with arguments and results. The tracing can be enabled with the +`OFFLOAD_TRACE` environment variable. + +### Implementation function declarations +``` +./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-impl-func-decls +``` +Generates declarations of the implementation of functions of every entry point +in the API, e.g. `offloadDeviceFoo_impl` for `offloadDeviceFoo`. + +### Print header +``` +./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-print-header +``` +This header contains `std::ostream &operator<<(std::ostream&)` definitions for +various API objects, including function parameters. + +As with the API header, it is expected that this header is part of the installed +package, so it is entirely generated by Tablegen. + +For ease of implementation, and since it is not strictly part of the API, this +is a C++ header file. If a C version is desirable it could be added. + +### Future Tablegen backends +`RecordTypes.hpp` contains wrappers for all of the API object types, which will +allow more backends to be easily added in future. + +## Adding to the API + +A new object can be added to the API by adding to one of the existing `.td` +files. It is also possible to add a new tablegen file to the API by adding it +to the includes in `OffloadAPI.td`. When the offload target is rebuilt, the +new definition will be included in the generated files. + +### Adding a new entry point + +When a new entry point is added (e.g. `offloadDeviceFoo`), the actual entry +point is automatically generated, which contains validation and tracing code. +It expects an implementation function (`offloadDeviceFoo_impl`) to be defined, +which it will call into. The definition of this implementation function should +be added to `src/offload_impl.cpp` diff --git a/offload/liboffload/CMakeLists.txt b/offload/liboffload/CMakeLists.txt new file mode 100644 index 0000000000000..f582d9e15fc6e --- /dev/null +++ b/offload/liboffload/CMakeLists.txt @@ -0,0 +1,37 @@ +add_subdirectory(API) + +add_llvm_library(LLVMOffload SHARED + src/OffloadLib.cpp + src/OffloadImpl.cpp) + +foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD) + target_link_libraries(LLVMOffload PRIVATE omptarget.rtl.${plugin}) +endforeach() + +if(LIBOMP_HAVE_VERSION_SCRIPT_FLAG) + target_link_libraries(LLVMOffload PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports") +endif() + +target_include_directories(LLVMOffload PUBLIC + ${CMAKE_CURRENT_BINARY_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include/generated + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/../plugins-nextgen/common/include) + +target_compile_options(LLVMOffload PRIVATE ${offload_compile_flags}) +target_link_options(LLVMOffload PRIVATE ${offload_link_flags}) + +target_compile_definitions(LLVMOffload PRIVATE + TARGET_NAME="Liboffload" + DEBUG_PREFIX="Liboffload" +) + +set_target_properties(LLVMOffload PROPERTIES + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH "$ORIGIN" + BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..") +install(TARGETS LLVMOffload LIBRARY COMPONENT LLVMOffload DESTINATION "${OFFLOAD_INSTALL_LIBDIR}") + +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/generated/OffloadAPI.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include/offload) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/generated/OffloadPrint.hpp DESTINATION ${CMAKE_INSTALL_PREFIX}/include/offload) diff --git a/offload/liboffload/README.md b/offload/liboffload/README.md new file mode 100644 index 0000000000000..95c9bf54d7bad --- /dev/null +++ b/offload/liboffload/README.md @@ -0,0 +1,8 @@ +# Offload New API + +This directory contains the implementation of the experimental work-in-progress +new API for Offload. It builds on top of the existing plugin implementations but +provides a single level of abstraction suitable for runtimes for languages other +than OpenMP to be built on top of. + +See the [API definition readme](API/README.md) for implementation details. \ No newline at end of file diff --git a/offload/liboffload/exports b/offload/liboffload/exports new file mode 100644 index 0000000000000..168341aa7d938 --- /dev/null +++ b/offload/liboffload/exports @@ -0,0 +1,6 @@ +VERS1.0 { +global: + ol*; +local: + *; +}; diff --git a/offload/liboffload/include/OffloadImpl.hpp b/offload/liboffload/include/OffloadImpl.hpp new file mode 100644 index 0000000000000..6d745095f3105 --- /dev/null +++ b/offload/liboffload/include/OffloadImpl.hpp @@ -0,0 +1,94 @@ +//===- offload_impl.hpp- Implementation helpers for the Offload library ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" + +struct OffloadConfig { + bool TracingEnabled = false; +}; + +OffloadConfig &offloadConfig(); + +// Use the StringSet container to efficiently deduplicate repeated error +// strings (e.g. if the same error is hit constantly in a long running program) +llvm::StringSet<> &errorStrs(); + +// Use an unordered_set to avoid duplicates of error structs themselves. +// We cannot store the structs directly as returned pointers to them must always +// be valid, and a rehash of the set may invalidate them. This requires +// custom hash and equal_to function objects. +using ErrPtrT = std::unique_ptr; +struct ErrPtrEqual { + bool operator()(const ErrPtrT &lhs, const ErrPtrT &rhs) const { + if (!lhs && !rhs) { + return true; + } + if (!lhs || !rhs) { + return false; + } + + bool StrsEqual = false; + if (lhs->Details == NULL && rhs->Details == NULL) { + StrsEqual = true; + } else if (lhs->Details != NULL && rhs->Details != NULL) { + StrsEqual = (std::strcmp(lhs->Details, rhs->Details) == 0); + } + return (lhs->Code == rhs->Code) && StrsEqual; + } +}; +struct ErrPtrHash { + size_t operator()(const ErrPtrT &e) const { + if (!e) { + // We shouldn't store empty errors (i.e. success), but just in case + return 0lu; + } else { + return std::hash{}(e->Code); + } + } +}; +using ErrSetT = std::unordered_set; +ErrSetT &errors(); + +struct ol_impl_result_t { + ol_impl_result_t(std::nullptr_t) : Result(OL_SUCCESS) {} + ol_impl_result_t(ol_errc_t Code) { + if (Code == OL_ERRC_SUCCESS) { + Result = nullptr; + } else { + auto Err = std::unique_ptr( + new ol_error_struct_t{Code, nullptr}); + Result = errors().emplace(std::move(Err)).first->get(); + } + } + + ol_impl_result_t(ol_errc_t Code, llvm::StringRef Details) { + assert(Code != OL_ERRC_SUCCESS); + Result = nullptr; + auto DetailsStr = errorStrs().insert(Details).first->getKeyData(); + auto Err = std::unique_ptr( + new ol_error_struct_t{Code, DetailsStr}); + Result = errors().emplace(std::move(Err)).first->get(); + } + + operator ol_result_t() { return Result; } + +private: + ol_result_t Result; +}; diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h new file mode 100644 index 0000000000000..11fcc96625ab8 --- /dev/null +++ b/offload/liboffload/include/generated/OffloadAPI.h @@ -0,0 +1,610 @@ +//===- Auto-generated file, part of the LLVM/Offload project --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Auto-generated file, do not manually edit. + +#pragma once + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_VERSION_MAJOR +/// @brief Major version of the Offload API +#define OL_VERSION_MAJOR 0 +#endif // OL_VERSION_MAJOR + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_VERSION_MINOR +/// @brief Minor version of the Offload API +#define OL_VERSION_MINOR 0 +#endif // OL_VERSION_MINOR + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_VERSION_PATCH +/// @brief Patch version of the Offload API +#define OL_VERSION_PATCH 1 +#endif // OL_VERSION_PATCH + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_APICALL +#if defined(_WIN32) +/// @brief Calling convention for all API functions +#define OL_APICALL __cdecl +#else +#define OL_APICALL +#endif // defined(_WIN32) +#endif // OL_APICALL + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_APIEXPORT +#if defined(_WIN32) +/// @brief Microsoft-specific dllexport storage-class attribute +#define OL_APIEXPORT __declspec(dllexport) +#else +#define OL_APIEXPORT +#endif // defined(_WIN32) +#endif // OL_APIEXPORT + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_DLLEXPORT +#if defined(_WIN32) +/// @brief Microsoft-specific dllexport storage-class attribute +#define OL_DLLEXPORT __declspec(dllexport) +#endif // defined(_WIN32) +#endif // OL_DLLEXPORT + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_DLLEXPORT +#if __GNUC__ >= 4 +/// @brief GCC-specific dllexport storage-class attribute +#define OL_DLLEXPORT __attribute__((visibility("default"))) +#else +#define OL_DLLEXPORT +#endif // __GNUC__ >= 4 +#endif // OL_DLLEXPORT + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of a platform instance +typedef struct ol_platform_handle_t_ *ol_platform_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of platform's device object +typedef struct ol_device_handle_t_ *ol_device_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of context object +typedef struct ol_context_handle_t_ *ol_context_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines Return/Error codes +typedef enum ol_errc_t { + /// Success + OL_ERRC_SUCCESS = 0, + /// Invalid Value + OL_ERRC_INVALID_VALUE = 1, + /// Invalid platform + OL_ERRC_INVALID_PLATFORM = 2, + /// Device not found + OL_ERRC_DEVICE_NOT_FOUND = 3, + /// Invalid device + OL_ERRC_INVALID_DEVICE = 4, + /// Device hung, reset, was removed, or driver update occurred + OL_ERRC_DEVICE_LOST = 5, + /// plugin is not initialized or specific entry-point is not implemented + OL_ERRC_UNINITIALIZED = 6, + /// Out of resources + OL_ERRC_OUT_OF_RESOURCES = 7, + /// generic error code for unsupported versions + OL_ERRC_UNSUPPORTED_VERSION = 8, + /// generic error code for unsupported features + OL_ERRC_UNSUPPORTED_FEATURE = 9, + /// generic error code for invalid arguments + OL_ERRC_INVALID_ARGUMENT = 10, + /// handle argument is not valid + OL_ERRC_INVALID_NULL_HANDLE = 11, + /// pointer argument may not be nullptr + OL_ERRC_INVALID_NULL_POINTER = 12, + /// invalid size or dimensions (e.g., must not be zero, or is out of bounds) + OL_ERRC_INVALID_SIZE = 13, + /// enumerator argument is not valid + OL_ERRC_INVALID_ENUMERATION = 14, + /// enumerator argument is not supported by the device + OL_ERRC_UNSUPPORTED_ENUMERATION = 15, + /// Unknown or internal error + OL_ERRC_UNKNOWN = 16, + /// @cond + OL_ERRC_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ol_errc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Details of the error condition returned by an API call +typedef struct ol_error_struct_t { + ol_errc_t Code; /// The error code + const char *Details; /// String containing error details +} ol_error_struct_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Result type returned by all entry points. +typedef const ol_error_struct_t *ol_result_t; + +/////////////////////////////////////////////////////////////////////////////// +#ifndef OL_SUCCESS +/// @brief Success condition +#define OL_SUCCESS NULL +#endif // OL_SUCCESS + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Code location information that can optionally be associated with an +/// API call +typedef struct ol_code_location_t { + const char *FunctionName; /// Function name + const char *SourceFile; /// Source code file + uint32_t LineNumber; /// Source code line number + uint32_t ColumnNumber; /// Source code column number +} ol_code_location_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Perform initialization of the Offload library and plugins +/// +/// @details +/// - This must be the first API call made by a user of the Offload library +/// - Each call will increment an internal reference count that is +/// decremented by `olShutDown` +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// - ::OL_ERRC_INVALID_NULL_POINTER +OL_APIEXPORT ol_result_t OL_APICALL olInit(); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Release the resources in use by Offload +/// +/// @details +/// - This decrements an internal reference count. When this reaches 0, all +/// resources will be released +/// - Subsequent API calls made after this are not valid +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// - ::OL_ERRC_INVALID_NULL_POINTER +OL_APIEXPORT ol_result_t OL_APICALL olShutDown(); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves all available platforms +/// +/// @details +/// - Multiple calls to this function will return identical platforms +/// handles, in the same order. +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_INVALID_SIZE +/// + `NumEntries == 0` +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == Platforms` +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatform( + // [in] The number of platforms to be added to Platforms. NumEntries must be + // greater than zero. + uint32_t NumEntries, + // [out] Array of handle of platforms. If NumEntries is less than the number + // of platforms available, then olGetPlatform shall only retrieve that + // number of platforms. + ol_platform_handle_t *Platforms); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves the number of available platforms +/// +/// @details +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == NumPlatforms` +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount( + // [out] returns the total number of platforms available. + uint32_t *NumPlatforms); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Supported platform info +typedef enum ol_platform_info_t { + /// [char[]] The string denoting name of the platform. The size of the info + /// needs to be dynamically queried. + OL_PLATFORM_INFO_NAME = 0, + /// [char[]] The string denoting name of the vendor of the platform. The size + /// of the info needs to be dynamically queried. + OL_PLATFORM_INFO_VENDOR_NAME = 1, + /// [char[]] The string denoting the version of the platform. The size of the + /// info needs to be dynamically queried. + OL_PLATFORM_INFO_VERSION = 2, + /// [ol_platform_backend_t] The native backend of the platform. + OL_PLATFORM_INFO_BACKEND = 3, + /// @cond + OL_PLATFORM_INFO_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ol_platform_info_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Identifies the native backend of the platform +typedef enum ol_platform_backend_t { + /// The backend is not recognized + OL_PLATFORM_BACKEND_UNKNOWN = 0, + /// The backend is CUDA + OL_PLATFORM_BACKEND_CUDA = 1, + /// The backend is AMDGPU + OL_PLATFORM_BACKEND_AMDGPU = 2, + /// @cond + OL_PLATFORM_BACKEND_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ol_platform_backend_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Queries the given property of the platform +/// +/// @details +/// - `olGetPlatformInfoSize` can be used to query the storage size required +/// for the given query. +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION +/// + If `PropName` is not supported by the platform. +/// - ::OL_ERRC_INVALID_SIZE +/// + `PropSize == 0` +/// + If `PropSize` is less than the real number of bytes needed to +/// return the info. +/// - ::OL_ERRC_INVALID_PLATFORM +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// + `NULL == Platform` +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == PropValue` +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfo( + // [in] handle of the platform + ol_platform_handle_t Platform, + // [in] type of the info to retrieve + ol_platform_info_t PropName, + // [in] the number of bytes pointed to by pPlatformInfo. + size_t PropSize, + // [out] array of bytes holding the info. If Size is not equal to or greater + // to the real number of bytes needed to return the info then the + // OL_ERRC_INVALID_SIZE error is returned and pPlatformInfo is not used. + void *PropValue); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Returns the storage size of the given platform query +/// +/// @details +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION +/// + If `PropName` is not supported by the platform. +/// - ::OL_ERRC_INVALID_PLATFORM +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// + `NULL == Platform` +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == PropSizeRet` +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoSize( + // [in] handle of the platform + ol_platform_handle_t Platform, + // [in] type of the info to query + ol_platform_info_t PropName, + // [out] pointer to the number of bytes required to store the query + size_t *PropSizeRet); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Supported device types +typedef enum ol_device_type_t { + /// The default device type as preferred by the runtime + OL_DEVICE_TYPE_DEFAULT = 0, + /// Devices of all types + OL_DEVICE_TYPE_ALL = 1, + /// GPU device type + OL_DEVICE_TYPE_GPU = 2, + /// CPU device type + OL_DEVICE_TYPE_CPU = 3, + /// @cond + OL_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ol_device_type_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Supported device info +typedef enum ol_device_info_t { + /// [ol_device_type_t] type of the device + OL_DEVICE_INFO_TYPE = 0, + /// [ol_platform_handle_t] the platform associated with the device + OL_DEVICE_INFO_PLATFORM = 1, + /// [char[]] Device name + OL_DEVICE_INFO_NAME = 2, + /// [char[]] Device vendor + OL_DEVICE_INFO_VENDOR = 3, + /// [char[]] Driver version + OL_DEVICE_INFO_DRIVER_VERSION = 4, + /// @cond + OL_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ol_device_info_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves the number of available devices within a platform +/// +/// @details +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// + `NULL == Platform` +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == NumDevices` +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCount( + // [in] handle of the platform instance + ol_platform_handle_t Platform, + // [out] pointer to the number of devices. + uint32_t *NumDevices); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves devices within a platform +/// +/// @details +/// - Multiple calls to this function will return identical device handles, +/// in the same order. +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_INVALID_SIZE +/// + `NumEntries == 0` +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// + `NULL == Platform` +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == Devices` +OL_APIEXPORT ol_result_t OL_APICALL olGetDevice( + // [in] handle of the platform instance + ol_platform_handle_t Platform, + // [in] the number of devices to be added to phDevices, which must be + // greater than zero + uint32_t NumEntries, + // [out] Array of device handles. If NumEntries is less than the number of + // devices available, then this function shall only retrieve that number of + // devices. + ol_device_handle_t *Devices); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Queries the given property of the device +/// +/// @details +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION +/// + If `PropName` is not supported by the device. +/// - ::OL_ERRC_INVALID_SIZE +/// + `PropSize == 0` +/// + If `PropSize` is less than the real number of bytes needed to +/// return the info. +/// - ::OL_ERRC_INVALID_DEVICE +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// + `NULL == Device` +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == PropValue` +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo( + // [in] handle of the device instance + ol_device_handle_t Device, + // [in] type of the info to retrieve + ol_device_info_t PropName, + // [in] the number of bytes pointed to by PropValue. + size_t PropSize, + // [out] array of bytes holding the info. If PropSize is not equal to or + // greater than the real number of bytes needed to return the info then the + // OL_ERRC_INVALID_SIZE error is returned and PropValue is not used. + void *PropValue); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Returns the storage size of the given device query +/// +/// @details +/// +/// @returns +/// - ::OL_RESULT_SUCCESS +/// - ::OL_ERRC_UNINITIALIZED +/// - ::OL_ERRC_DEVICE_LOST +/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION +/// + If `PropName` is not supported by the device. +/// - ::OL_ERRC_INVALID_DEVICE +/// - ::OL_ERRC_INVALID_NULL_HANDLE +/// + `NULL == Device` +/// - ::OL_ERRC_INVALID_NULL_POINTER +/// + `NULL == PropSizeRet` +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize( + // [in] handle of the device instance + ol_device_handle_t Device, + // [in] type of the info to retrieve + ol_device_info_t PropName, + // [out] pointer to the number of bytes required to store the query + size_t *PropSizeRet); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetPlatform +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_platform_params_t { + uint32_t *pNumEntries; + ol_platform_handle_t **pPlatforms; +} ol_get_platform_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetPlatformCount +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_platform_count_params_t { + uint32_t **pNumPlatforms; +} ol_get_platform_count_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetPlatformInfo +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_platform_info_params_t { + ol_platform_handle_t *pPlatform; + ol_platform_info_t *pPropName; + size_t *pPropSize; + void **pPropValue; +} ol_get_platform_info_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetPlatformInfoSize +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_platform_info_size_params_t { + ol_platform_handle_t *pPlatform; + ol_platform_info_t *pPropName; + size_t **pPropSizeRet; +} ol_get_platform_info_size_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetDeviceCount +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_device_count_params_t { + ol_platform_handle_t *pPlatform; + uint32_t **pNumDevices; +} ol_get_device_count_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetDevice +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_device_params_t { + ol_platform_handle_t *pPlatform; + uint32_t *pNumEntries; + ol_device_handle_t **pDevices; +} ol_get_device_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetDeviceInfo +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_device_info_params_t { + ol_device_handle_t *pDevice; + ol_device_info_t *pPropName; + size_t *pPropSize; + void **pPropValue; +} ol_get_device_info_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for olGetDeviceInfoSize +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct ol_get_device_info_size_params_t { + ol_device_handle_t *pDevice; + ol_device_info_t *pPropName; + size_t **pPropSizeRet; +} ol_get_device_info_size_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olInit that also sets source code location information +/// @details See also ::olInit +OL_APIEXPORT ol_result_t OL_APICALL +olInitWithCodeLoc(ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olShutDown that also sets source code location information +/// @details See also ::olShutDown +OL_APIEXPORT ol_result_t OL_APICALL +olShutDownWithCodeLoc(ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetPlatform that also sets source code location +/// information +/// @details See also ::olGetPlatform +OL_APIEXPORT ol_result_t OL_APICALL +olGetPlatformWithCodeLoc(uint32_t NumEntries, ol_platform_handle_t *Platforms, + ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetPlatformCount that also sets source code location +/// information +/// @details See also ::olGetPlatformCount +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCountWithCodeLoc( + uint32_t *NumPlatforms, ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetPlatformInfo that also sets source code location +/// information +/// @details See also ::olGetPlatformInfo +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoWithCodeLoc( + ol_platform_handle_t Platform, ol_platform_info_t PropName, size_t PropSize, + void *PropValue, ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetPlatformInfoSize that also sets source code location +/// information +/// @details See also ::olGetPlatformInfoSize +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoSizeWithCodeLoc( + ol_platform_handle_t Platform, ol_platform_info_t PropName, + size_t *PropSizeRet, ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetDeviceCount that also sets source code location +/// information +/// @details See also ::olGetDeviceCount +OL_APIEXPORT ol_result_t OL_APICALL +olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform, uint32_t *NumDevices, + ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetDevice that also sets source code location +/// information +/// @details See also ::olGetDevice +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceWithCodeLoc( + ol_platform_handle_t Platform, uint32_t NumEntries, + ol_device_handle_t *Devices, ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetDeviceInfo that also sets source code location +/// information +/// @details See also ::olGetDeviceInfo +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoWithCodeLoc( + ol_device_handle_t Device, ol_device_info_t PropName, size_t PropSize, + void *PropValue, ol_code_location_t *CodeLocation); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of olGetDeviceInfoSize that also sets source code location +/// information +/// @details See also ::olGetDeviceInfoSize +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc( + ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet, + ol_code_location_t *CodeLocation); + +#if defined(__cplusplus) +} // extern "C" +#endif diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc new file mode 100644 index 0000000000000..49c1c8169615e --- /dev/null +++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc @@ -0,0 +1,441 @@ +//===- Auto-generated file, part of the LLVM/Offload project --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olInit_val() { + if (true /*enableParameterValidation*/) { + } + + return olInit_impl(); +} +OL_APIEXPORT ol_result_t OL_APICALL olInit() { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olInit"; + } + + ol_result_t Result = olInit_val(); + + if (offloadConfig().TracingEnabled) { + std::cout << "()"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olInitWithCodeLoc(ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olInit(); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olShutDown_val() { + if (true /*enableParameterValidation*/) { + } + + return olShutDown_impl(); +} +OL_APIEXPORT ol_result_t OL_APICALL olShutDown() { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olShutDown"; + } + + ol_result_t Result = olShutDown_val(); + + if (offloadConfig().TracingEnabled) { + std::cout << "()"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olShutDownWithCodeLoc(ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olShutDown(); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetPlatform_val(uint32_t NumEntries, + ol_platform_handle_t *Platforms) { + if (true /*enableParameterValidation*/) { + if (NumEntries == 0) { + return OL_ERRC_INVALID_SIZE; + } + + if (NULL == Platforms) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetPlatform_impl(NumEntries, Platforms); +} +OL_APIEXPORT ol_result_t OL_APICALL +olGetPlatform(uint32_t NumEntries, ol_platform_handle_t *Platforms) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetPlatform"; + } + + ol_result_t Result = olGetPlatform_val(NumEntries, Platforms); + + if (offloadConfig().TracingEnabled) { + ol_get_platform_params_t Params = {&NumEntries, &Platforms}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetPlatformWithCodeLoc(uint32_t NumEntries, + ol_platform_handle_t *Platforms, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetPlatform(NumEntries, Platforms); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetPlatformCount_val(uint32_t *NumPlatforms) { + if (true /*enableParameterValidation*/) { + if (NULL == NumPlatforms) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetPlatformCount_impl(NumPlatforms); +} +OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(uint32_t *NumPlatforms) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetPlatformCount"; + } + + ol_result_t Result = olGetPlatformCount_val(NumPlatforms); + + if (offloadConfig().TracingEnabled) { + ol_get_platform_count_params_t Params = {&NumPlatforms}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetPlatformCountWithCodeLoc(uint32_t *NumPlatforms, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetPlatformCount(NumPlatforms); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetPlatformInfo_val(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t PropSize, void *PropValue) { + if (true /*enableParameterValidation*/) { + if (PropSize == 0) { + return OL_ERRC_INVALID_SIZE; + } + + if (NULL == Platform) { + return OL_ERRC_INVALID_NULL_HANDLE; + } + + if (NULL == PropValue) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetPlatformInfo_impl(Platform, PropName, PropSize, PropValue); +} +OL_APIEXPORT ol_result_t OL_APICALL +olGetPlatformInfo(ol_platform_handle_t Platform, ol_platform_info_t PropName, + size_t PropSize, void *PropValue) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetPlatformInfo"; + } + + ol_result_t Result = + olGetPlatformInfo_val(Platform, PropName, PropSize, PropValue); + + if (offloadConfig().TracingEnabled) { + ol_get_platform_info_params_t Params = {&Platform, &PropName, &PropSize, + &PropValue}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetPlatformInfoWithCodeLoc(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t PropSize, void *PropValue, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = + olGetPlatformInfo(Platform, PropName, PropSize, PropValue); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetPlatformInfoSize_val(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t *PropSizeRet) { + if (true /*enableParameterValidation*/) { + if (NULL == Platform) { + return OL_ERRC_INVALID_NULL_HANDLE; + } + + if (NULL == PropSizeRet) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetPlatformInfoSize_impl(Platform, PropName, PropSizeRet); +} +OL_APIEXPORT ol_result_t OL_APICALL +olGetPlatformInfoSize(ol_platform_handle_t Platform, + ol_platform_info_t PropName, size_t *PropSizeRet) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetPlatformInfoSize"; + } + + ol_result_t Result = + olGetPlatformInfoSize_val(Platform, PropName, PropSizeRet); + + if (offloadConfig().TracingEnabled) { + ol_get_platform_info_size_params_t Params = {&Platform, &PropName, + &PropSizeRet}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetPlatformInfoSizeWithCodeLoc(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t *PropSizeRet, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetPlatformInfoSize(Platform, PropName, PropSizeRet); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetDeviceCount_val(ol_platform_handle_t Platform, + uint32_t *NumDevices) { + if (true /*enableParameterValidation*/) { + if (NULL == Platform) { + return OL_ERRC_INVALID_NULL_HANDLE; + } + + if (NULL == NumDevices) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetDeviceCount_impl(Platform, NumDevices); +} +OL_APIEXPORT ol_result_t OL_APICALL +olGetDeviceCount(ol_platform_handle_t Platform, uint32_t *NumDevices) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetDeviceCount"; + } + + ol_result_t Result = olGetDeviceCount_val(Platform, NumDevices); + + if (offloadConfig().TracingEnabled) { + ol_get_device_count_params_t Params = {&Platform, &NumDevices}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform, + uint32_t *NumDevices, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetDeviceCount(Platform, NumDevices); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetDevice_val(ol_platform_handle_t Platform, + uint32_t NumEntries, + ol_device_handle_t *Devices) { + if (true /*enableParameterValidation*/) { + if (NumEntries == 0) { + return OL_ERRC_INVALID_SIZE; + } + + if (NULL == Platform) { + return OL_ERRC_INVALID_NULL_HANDLE; + } + + if (NULL == Devices) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetDevice_impl(Platform, NumEntries, Devices); +} +OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(ol_platform_handle_t Platform, + uint32_t NumEntries, + ol_device_handle_t *Devices) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetDevice"; + } + + ol_result_t Result = olGetDevice_val(Platform, NumEntries, Devices); + + if (offloadConfig().TracingEnabled) { + ol_get_device_params_t Params = {&Platform, &NumEntries, &Devices}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetDeviceWithCodeLoc(ol_platform_handle_t Platform, + uint32_t NumEntries, + ol_device_handle_t *Devices, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetDevice(Platform, NumEntries, Devices); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetDeviceInfo_val(ol_device_handle_t Device, + ol_device_info_t PropName, size_t PropSize, + void *PropValue) { + if (true /*enableParameterValidation*/) { + if (PropSize == 0) { + return OL_ERRC_INVALID_SIZE; + } + + if (NULL == Device) { + return OL_ERRC_INVALID_NULL_HANDLE; + } + + if (NULL == PropValue) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetDeviceInfo_impl(Device, PropName, PropSize, PropValue); +} +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t PropSize, + void *PropValue) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetDeviceInfo"; + } + + ol_result_t Result = + olGetDeviceInfo_val(Device, PropName, PropSize, PropValue); + + if (offloadConfig().TracingEnabled) { + ol_get_device_info_params_t Params = {&Device, &PropName, &PropSize, + &PropValue}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetDeviceInfoWithCodeLoc(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t PropSize, void *PropValue, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetDeviceInfo(Device, PropName, PropSize, PropValue); + + currentCodeLocation() = nullptr; + return Result; +} + +/////////////////////////////////////////////////////////////////////////////// +ol_impl_result_t olGetDeviceInfoSize_val(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t *PropSizeRet) { + if (true /*enableParameterValidation*/) { + if (NULL == Device) { + return OL_ERRC_INVALID_NULL_HANDLE; + } + + if (NULL == PropSizeRet) { + return OL_ERRC_INVALID_NULL_POINTER; + } + } + + return olGetDeviceInfoSize_impl(Device, PropName, PropSizeRet); +} +OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize( + ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet) { + if (offloadConfig().TracingEnabled) { + std::cout << "---> olGetDeviceInfoSize"; + } + + ol_result_t Result = olGetDeviceInfoSize_val(Device, PropName, PropSizeRet); + + if (offloadConfig().TracingEnabled) { + ol_get_device_info_size_params_t Params = {&Device, &PropName, + &PropSizeRet}; + std::cout << "(" << &Params << ")"; + std::cout << "-> " << Result << "\n"; + if (Result && Result->Details) { + std::cout << " *Error Details* " << Result->Details << " \n"; + } + } + return Result; +} +ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t *PropSizeRet, + ol_code_location_t *CodeLocation) { + currentCodeLocation() = CodeLocation; + ol_result_t Result = olGetDeviceInfoSize(Device, PropName, PropSizeRet); + + currentCodeLocation() = nullptr; + return Result; +} diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc new file mode 100644 index 0000000000000..48115493c790f --- /dev/null +++ b/offload/liboffload/include/generated/OffloadFuncs.inc @@ -0,0 +1,34 @@ +//===- Auto-generated file, part of the LLVM/Offload project --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_FUNC +#error Please define the macro OFFLOAD_FUNC(Function) +#endif + +OFFLOAD_FUNC(olInit) +OFFLOAD_FUNC(olShutDown) +OFFLOAD_FUNC(olGetPlatform) +OFFLOAD_FUNC(olGetPlatformCount) +OFFLOAD_FUNC(olGetPlatformInfo) +OFFLOAD_FUNC(olGetPlatformInfoSize) +OFFLOAD_FUNC(olGetDeviceCount) +OFFLOAD_FUNC(olGetDevice) +OFFLOAD_FUNC(olGetDeviceInfo) +OFFLOAD_FUNC(olGetDeviceInfoSize) +OFFLOAD_FUNC(olInitWithCodeLoc) +OFFLOAD_FUNC(olShutDownWithCodeLoc) +OFFLOAD_FUNC(olGetPlatformWithCodeLoc) +OFFLOAD_FUNC(olGetPlatformCountWithCodeLoc) +OFFLOAD_FUNC(olGetPlatformInfoWithCodeLoc) +OFFLOAD_FUNC(olGetPlatformInfoSizeWithCodeLoc) +OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc) +OFFLOAD_FUNC(olGetDeviceWithCodeLoc) +OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc) +OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc) + +#undef OFFLOAD_FUNC diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc new file mode 100644 index 0000000000000..5b26b2653a05d --- /dev/null +++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc @@ -0,0 +1,38 @@ +//===- Auto-generated file, part of the LLVM/Offload project --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +ol_impl_result_t olInit_impl(); + +ol_impl_result_t olShutDown_impl(); + +ol_impl_result_t olGetPlatform_impl(uint32_t NumEntries, + ol_platform_handle_t *Platforms); + +ol_impl_result_t olGetPlatformCount_impl(uint32_t *NumPlatforms); + +ol_impl_result_t olGetPlatformInfo_impl(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t PropSize, void *PropValue); + +ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t *PropSizeRet); + +ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform, + uint32_t *NumDevices); + +ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform, + uint32_t NumEntries, + ol_device_handle_t *Devices); + +ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t PropSize, void *PropValue); + +ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t *PropSizeRet); diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp new file mode 100644 index 0000000000000..8981bb054a4cb --- /dev/null +++ b/offload/liboffload/include/generated/OffloadPrint.hpp @@ -0,0 +1,428 @@ +//===- Auto-generated file, part of the LLVM/Offload project --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Auto-generated file, do not manually edit. + +#pragma once + +#include +#include + +template +inline ol_result_t printPtr(std::ostream &os, const T *ptr); +template +inline void printTagged(std::ostream &os, const void *ptr, T value, + size_t size); +template struct is_handle : std::false_type {}; +template <> struct is_handle : std::true_type {}; +template <> struct is_handle : std::true_type {}; +template <> struct is_handle : std::true_type {}; +template inline constexpr bool is_handle_v = is_handle::value; + +inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ol_platform_info_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ol_platform_backend_t value); +inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value); +inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ol_errc_t type +/// @returns std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value) { + switch (value) { + case OL_ERRC_SUCCESS: + os << "OL_ERRC_SUCCESS"; + break; + case OL_ERRC_INVALID_VALUE: + os << "OL_ERRC_INVALID_VALUE"; + break; + case OL_ERRC_INVALID_PLATFORM: + os << "OL_ERRC_INVALID_PLATFORM"; + break; + case OL_ERRC_DEVICE_NOT_FOUND: + os << "OL_ERRC_DEVICE_NOT_FOUND"; + break; + case OL_ERRC_INVALID_DEVICE: + os << "OL_ERRC_INVALID_DEVICE"; + break; + case OL_ERRC_DEVICE_LOST: + os << "OL_ERRC_DEVICE_LOST"; + break; + case OL_ERRC_UNINITIALIZED: + os << "OL_ERRC_UNINITIALIZED"; + break; + case OL_ERRC_OUT_OF_RESOURCES: + os << "OL_ERRC_OUT_OF_RESOURCES"; + break; + case OL_ERRC_UNSUPPORTED_VERSION: + os << "OL_ERRC_UNSUPPORTED_VERSION"; + break; + case OL_ERRC_UNSUPPORTED_FEATURE: + os << "OL_ERRC_UNSUPPORTED_FEATURE"; + break; + case OL_ERRC_INVALID_ARGUMENT: + os << "OL_ERRC_INVALID_ARGUMENT"; + break; + case OL_ERRC_INVALID_NULL_HANDLE: + os << "OL_ERRC_INVALID_NULL_HANDLE"; + break; + case OL_ERRC_INVALID_NULL_POINTER: + os << "OL_ERRC_INVALID_NULL_POINTER"; + break; + case OL_ERRC_INVALID_SIZE: + os << "OL_ERRC_INVALID_SIZE"; + break; + case OL_ERRC_INVALID_ENUMERATION: + os << "OL_ERRC_INVALID_ENUMERATION"; + break; + case OL_ERRC_UNSUPPORTED_ENUMERATION: + os << "OL_ERRC_UNSUPPORTED_ENUMERATION"; + break; + case OL_ERRC_UNKNOWN: + os << "OL_ERRC_UNKNOWN"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ol_platform_info_t type +/// @returns std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ol_platform_info_t value) { + switch (value) { + case OL_PLATFORM_INFO_NAME: + os << "OL_PLATFORM_INFO_NAME"; + break; + case OL_PLATFORM_INFO_VENDOR_NAME: + os << "OL_PLATFORM_INFO_VENDOR_NAME"; + break; + case OL_PLATFORM_INFO_VERSION: + os << "OL_PLATFORM_INFO_VERSION"; + break; + case OL_PLATFORM_INFO_BACKEND: + os << "OL_PLATFORM_INFO_BACKEND"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print type-tagged ol_platform_info_t enum value +/// @returns std::ostream & +template <> +inline void printTagged(std::ostream &os, const void *ptr, + ol_platform_info_t value, size_t size) { + if (ptr == NULL) { + printPtr(os, ptr); + return; + } + + switch (value) { + case OL_PLATFORM_INFO_NAME: { + printPtr(os, (const char *)ptr); + break; + } + case OL_PLATFORM_INFO_VENDOR_NAME: { + printPtr(os, (const char *)ptr); + break; + } + case OL_PLATFORM_INFO_VERSION: { + printPtr(os, (const char *)ptr); + break; + } + case OL_PLATFORM_INFO_BACKEND: { + const ol_platform_backend_t *const tptr = + (const ol_platform_backend_t *const)ptr; + os << (const void *)tptr << " ("; + os << *tptr; + os << ")"; + break; + } + default: + os << "unknown enumerator"; + break; + } +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ol_platform_backend_t type +/// @returns std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ol_platform_backend_t value) { + switch (value) { + case OL_PLATFORM_BACKEND_UNKNOWN: + os << "OL_PLATFORM_BACKEND_UNKNOWN"; + break; + case OL_PLATFORM_BACKEND_CUDA: + os << "OL_PLATFORM_BACKEND_CUDA"; + break; + case OL_PLATFORM_BACKEND_AMDGPU: + os << "OL_PLATFORM_BACKEND_AMDGPU"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ol_device_type_t type +/// @returns std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value) { + switch (value) { + case OL_DEVICE_TYPE_DEFAULT: + os << "OL_DEVICE_TYPE_DEFAULT"; + break; + case OL_DEVICE_TYPE_ALL: + os << "OL_DEVICE_TYPE_ALL"; + break; + case OL_DEVICE_TYPE_GPU: + os << "OL_DEVICE_TYPE_GPU"; + break; + case OL_DEVICE_TYPE_CPU: + os << "OL_DEVICE_TYPE_CPU"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ol_device_info_t type +/// @returns std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value) { + switch (value) { + case OL_DEVICE_INFO_TYPE: + os << "OL_DEVICE_INFO_TYPE"; + break; + case OL_DEVICE_INFO_PLATFORM: + os << "OL_DEVICE_INFO_PLATFORM"; + break; + case OL_DEVICE_INFO_NAME: + os << "OL_DEVICE_INFO_NAME"; + break; + case OL_DEVICE_INFO_VENDOR: + os << "OL_DEVICE_INFO_VENDOR"; + break; + case OL_DEVICE_INFO_DRIVER_VERSION: + os << "OL_DEVICE_INFO_DRIVER_VERSION"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print type-tagged ol_device_info_t enum value +/// @returns std::ostream & +template <> +inline void printTagged(std::ostream &os, const void *ptr, + ol_device_info_t value, size_t size) { + if (ptr == NULL) { + printPtr(os, ptr); + return; + } + + switch (value) { + case OL_DEVICE_INFO_TYPE: { + const ol_device_type_t *const tptr = (const ol_device_type_t *const)ptr; + os << (const void *)tptr << " ("; + os << *tptr; + os << ")"; + break; + } + case OL_DEVICE_INFO_PLATFORM: { + const ol_platform_handle_t *const tptr = + (const ol_platform_handle_t *const)ptr; + os << (const void *)tptr << " ("; + os << *tptr; + os << ")"; + break; + } + case OL_DEVICE_INFO_NAME: { + printPtr(os, (const char *)ptr); + break; + } + case OL_DEVICE_INFO_VENDOR: { + printPtr(os, (const char *)ptr); + break; + } + case OL_DEVICE_INFO_DRIVER_VERSION: { + printPtr(os, (const char *)ptr); + break; + } + default: + os << "unknown enumerator"; + break; + } +} + +inline std::ostream &operator<<(std::ostream &os, + const ol_error_struct_t *Err) { + if (Err == nullptr) { + os << "OL_SUCCESS"; + } else { + os << Err->Code; + } + return os; +} + +inline std::ostream &operator<<(std::ostream &os, + const struct ol_get_platform_params_t *params) { + os << ".NumEntries = "; + os << *params->pNumEntries; + os << ", "; + os << ".Platforms = "; + os << "{"; + for (size_t i = 0; i < *params->pNumEntries; i++) { + if (i > 0) { + os << ", "; + } + printPtr(os, (*params->pPlatforms)[i]); + } + os << "}"; + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + const struct ol_get_platform_count_params_t *params) { + os << ".NumPlatforms = "; + printPtr(os, *params->pNumPlatforms); + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + const struct ol_get_platform_info_params_t *params) { + os << ".Platform = "; + printPtr(os, *params->pPlatform); + os << ", "; + os << ".PropName = "; + os << *params->pPropName; + os << ", "; + os << ".PropSize = "; + os << *params->pPropSize; + os << ", "; + os << ".PropValue = "; + printTagged(os, *params->pPropValue, *params->pPropName, *params->pPropSize); + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + const struct ol_get_platform_info_size_params_t *params) { + os << ".Platform = "; + printPtr(os, *params->pPlatform); + os << ", "; + os << ".PropName = "; + os << *params->pPropName; + os << ", "; + os << ".PropSizeRet = "; + printPtr(os, *params->pPropSizeRet); + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + const struct ol_get_device_count_params_t *params) { + os << ".Platform = "; + printPtr(os, *params->pPlatform); + os << ", "; + os << ".NumDevices = "; + printPtr(os, *params->pNumDevices); + return os; +} + +inline std::ostream &operator<<(std::ostream &os, + const struct ol_get_device_params_t *params) { + os << ".Platform = "; + printPtr(os, *params->pPlatform); + os << ", "; + os << ".NumEntries = "; + os << *params->pNumEntries; + os << ", "; + os << ".Devices = "; + os << "{"; + for (size_t i = 0; i < *params->pNumEntries; i++) { + if (i > 0) { + os << ", "; + } + printPtr(os, (*params->pDevices)[i]); + } + os << "}"; + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, const struct ol_get_device_info_params_t *params) { + os << ".Device = "; + printPtr(os, *params->pDevice); + os << ", "; + os << ".PropName = "; + os << *params->pPropName; + os << ", "; + os << ".PropSize = "; + os << *params->pPropSize; + os << ", "; + os << ".PropValue = "; + printTagged(os, *params->pPropValue, *params->pPropName, *params->pPropSize); + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + const struct ol_get_device_info_size_params_t *params) { + os << ".Device = "; + printPtr(os, *params->pDevice); + os << ", "; + os << ".PropName = "; + os << *params->pPropName; + os << ", "; + os << ".PropSizeRet = "; + printPtr(os, *params->pPropSizeRet); + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +// @brief Print pointer value +template +inline ol_result_t printPtr(std::ostream &os, const T *ptr) { + if (ptr == nullptr) { + os << "nullptr"; + } else if constexpr (std::is_pointer_v) { + os << (const void *)(ptr) << " ("; + printPtr(os, *ptr); + os << ")"; + } else if constexpr (std::is_void_v || is_handle_v) { + os << (const void *)ptr; + } else if constexpr (std::is_same_v, char>) { + os << (const void *)(ptr) << " ("; + os << ptr; + os << ")"; + } else { + os << (const void *)(ptr) << " ("; + os << *ptr; + os << ")"; + } + + return OL_SUCCESS; +} diff --git a/offload/liboffload/src/Helpers.hpp b/offload/liboffload/src/Helpers.hpp new file mode 100644 index 0000000000000..d003d30252462 --- /dev/null +++ b/offload/liboffload/src/Helpers.hpp @@ -0,0 +1,95 @@ +//===- helpers.hpp- GetInfo return helpers for the new LLVM/Offload API ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The getInfo*/ReturnHelper facilities provide shortcut way of writing return +// data + size for the various getInfo APIs. Based on the equivalent +// implementations in Unified Runtime. +// +//===----------------------------------------------------------------------===// + +#include "OffloadAPI.h" + +#include + +template +ol_errc_t getInfoImpl(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet, T Value, size_t ValueSize, + Assign &&AssignFunc) { + if (!ParamValue && !ParamValueSizeRet) { + return OL_ERRC_INVALID_NULL_POINTER; + } + + if (ParamValue != nullptr) { + if (ParamValueSize < ValueSize) { + return OL_ERRC_INVALID_SIZE; + } + AssignFunc(ParamValue, Value, ValueSize); + } + + if (ParamValueSizeRet != nullptr) { + *ParamValueSizeRet = ValueSize; + } + + return OL_ERRC_SUCCESS; +} + +template +ol_errc_t getInfo(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet, T Value) { + auto Assignment = [](void *ParamValue, T Value, size_t) { + *static_cast(ParamValue) = Value; + }; + + return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value, + sizeof(T), Assignment); +} + +template +ol_errc_t getInfoArray(size_t array_length, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet, + const T *Value) { + return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value, + array_length * sizeof(T), memcpy); +} + +template <> +inline ol_errc_t getInfo(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet, + const char *Value) { + return getInfoArray(strlen(Value) + 1, ParamValueSize, ParamValue, + ParamValueSizeRet, Value); +} + +class ReturnHelper { +public: + ReturnHelper(size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) + : ParamValueSize(ParamValueSize), ParamValue(ParamValue), + ParamValueSizeRet(ParamValueSizeRet) {} + + // A version where in/out info size is represented by a single pointer + // to a value which is updated on return + ReturnHelper(size_t *ParamValueSize, void *ParamValue) + : ParamValueSize(*ParamValueSize), ParamValue(ParamValue), + ParamValueSizeRet(ParamValueSize) {} + + // Scalar return Value + template ol_errc_t operator()(const T &t) { + return getInfo(ParamValueSize, ParamValue, ParamValueSizeRet, t); + } + + // Array return Value + template ol_errc_t operator()(const T *t, size_t s) { + return getInfoArray(s, ParamValueSize, ParamValue, ParamValueSizeRet, t); + } + +protected: + size_t ParamValueSize; + void *ParamValue; + size_t *ParamValueSizeRet; +}; diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp new file mode 100644 index 0000000000000..457f1053f1634 --- /dev/null +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -0,0 +1,247 @@ +//===- ol_impl.cpp - Implementation of the new LLVM/Offload API ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains the definitions of the new LLVM/Offload API entry points. See +// new-api/API/README.md for more information. +// +//===----------------------------------------------------------------------===// + +#include "OffloadImpl.hpp" +#include "Helpers.hpp" +#include "PluginManager.h" +#include "llvm/Support/FormatVariadic.h" +#include + +#include + +using namespace llvm; +using namespace llvm::omp::target::plugin; + +// Handle type definitions. Ideally these would be 1:1 with the plugins +struct ol_device_handle_t_ { + int DeviceNum; + GenericDeviceTy &Device; + ol_platform_handle_t Platform; +}; + +struct ol_platform_handle_t_ { + std::unique_ptr Plugin; + std::vector Devices; +}; + +using PlatformVecT = SmallVector; +PlatformVecT &Platforms() { + static PlatformVecT Platforms; + return Platforms; +} + +// TODO: Some plugins expect to be linked into libomptarget which defines these +// symbols to implement ompt callbacks. The least invasive workaround here is to +// define them in libLLVMOffload as false/null so they are never used. In future +// it would be better to allow the plugins to implement callbacks without +// pulling in details from libomptarget. +#ifdef OMPT_SUPPORT +namespace llvm::omp::target { +namespace ompt { +bool Initialized = false; +ompt_get_callback_t lookupCallbackByCode = nullptr; +ompt_function_lookup_t lookupCallbackByName = nullptr; +} // namespace ompt +} // namespace llvm::omp::target +#endif + +// Every plugin exports this method to create an instance of the plugin type. +#define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name(); +#include "Shared/Targets.def" + +void initPlugins() { + // Attempt to create an instance of each supported plugin. +#define PLUGIN_TARGET(Name) \ + do { \ + Platforms().emplace_back(ol_platform_handle_t_{ \ + std::unique_ptr(createPlugin_##Name()), {}}); \ + } while (false); +#include "Shared/Targets.def" + + // Preemptively initialize all devices in the plugin so we can just return + // them from deviceGet + for (auto &Platform : Platforms()) { + auto Err = Platform.Plugin->init(); + [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices(); + DevNum++) { + if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) { + Platform.Devices.emplace_back(ol_device_handle_t_{ + DevNum, Platform.Plugin->getDevice(DevNum), &Platform}); + } + } + } + + offloadConfig().TracingEnabled = std::getenv("OFFLOAD_TRACE"); +} + +// TODO: We can properly reference count here and manage the resources in a more +// clever way +ol_impl_result_t olInit_impl() { + static std::once_flag InitFlag; + std::call_once(InitFlag, initPlugins); + + return OL_SUCCESS; +} +ol_impl_result_t olShutDown_impl() { return OL_SUCCESS; } + +ol_impl_result_t olGetPlatformCount_impl(uint32_t *NumPlatforms) { + *NumPlatforms = Platforms().size(); + return OL_SUCCESS; +} + +ol_impl_result_t olGetPlatform_impl(uint32_t NumEntries, + ol_platform_handle_t *PlatformsOut) { + if (NumEntries > Platforms().size()) { + return {OL_ERRC_INVALID_SIZE, + std::string{formatv("{0} platform(s) available but {1} requested.", + Platforms().size(), NumEntries)}}; + } + + for (uint32_t PlatformIndex = 0; PlatformIndex < NumEntries; + PlatformIndex++) { + PlatformsOut[PlatformIndex] = &(Platforms())[PlatformIndex]; + } + + return OL_SUCCESS; +} + +ol_impl_result_t olGetPlatformInfoImplDetail(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { + ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case OL_PLATFORM_INFO_NAME: + return ReturnValue(Platform->Plugin->getName()); + case OL_PLATFORM_INFO_VENDOR_NAME: + // TODO: Implement this + return ReturnValue("Unknown platform vendor"); + case OL_PLATFORM_INFO_VERSION: { + return ReturnValue(formatv("v{0}.{1}.{2}", OL_VERSION_MAJOR, + OL_VERSION_MINOR, OL_VERSION_PATCH) + .str() + .c_str()); + } + case OL_PLATFORM_INFO_BACKEND: { + auto PluginName = Platform->Plugin->getName(); + if (PluginName == StringRef("CUDA")) { + return ReturnValue(OL_PLATFORM_BACKEND_CUDA); + } else if (PluginName == StringRef("AMDGPU")) { + return ReturnValue(OL_PLATFORM_BACKEND_AMDGPU); + } else { + return ReturnValue(OL_PLATFORM_BACKEND_UNKNOWN); + } + } + default: + return OL_ERRC_INVALID_ENUMERATION; + } + + return OL_SUCCESS; +} + +ol_impl_result_t olGetPlatformInfo_impl(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t PropSize, void *PropValue) { + return olGetPlatformInfoImplDetail(Platform, PropName, PropSize, PropValue, + nullptr); +} + +ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform, + ol_platform_info_t PropName, + size_t *PropSizeRet) { + return olGetPlatformInfoImplDetail(Platform, PropName, 0, nullptr, + PropSizeRet); +} + +ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform, + uint32_t *pNumDevices) { + *pNumDevices = static_cast(Platform->Devices.size()); + + return OL_SUCCESS; +} + +ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform, + uint32_t NumEntries, + ol_device_handle_t *Devices) { + if (NumEntries > Platform->Devices.size()) { + return OL_ERRC_INVALID_SIZE; + } + + for (uint32_t DeviceIndex = 0; DeviceIndex < NumEntries; DeviceIndex++) { + Devices[DeviceIndex] = &(Platform->Devices[DeviceIndex]); + } + + return OL_SUCCESS; +} + +ol_impl_result_t olGetDeviceInfoImplDetail(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { + + ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + InfoQueueTy DevInfo; + if (auto Err = Device->Device.obtainInfoImpl(DevInfo)) + return OL_ERRC_OUT_OF_RESOURCES; + + // Find the info if it exists under any of the given names + auto GetInfo = [&DevInfo](std::vector Names) { + for (auto Name : Names) { + auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) { + return Info.Key == Name; + }; + auto Item = std::find_if(DevInfo.getQueue().begin(), + DevInfo.getQueue().end(), InfoKeyMatches); + + if (Item != std::end(DevInfo.getQueue())) { + return Item->Value; + } + } + + return std::string(""); + }; + + switch (PropName) { + case OL_DEVICE_INFO_PLATFORM: + return ReturnValue(Device->Platform); + case OL_DEVICE_INFO_TYPE: + return ReturnValue(OL_DEVICE_TYPE_GPU); + case OL_DEVICE_INFO_NAME: + return ReturnValue(GetInfo({"Device Name"}).c_str()); + case OL_DEVICE_INFO_VENDOR: + return ReturnValue(GetInfo({"Vendor Name"}).c_str()); + case OL_DEVICE_INFO_DRIVER_VERSION: + return ReturnValue( + GetInfo({"CUDA Driver Version", "HSA Runtime Version"}).c_str()); + default: + return OL_ERRC_INVALID_ENUMERATION; + } + + return OL_SUCCESS; +} + +ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t PropSize, void *PropValue) { + return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue, + nullptr); +} + +ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device, + ol_device_info_t PropName, + size_t *PropSizeRet) { + return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet); +} diff --git a/offload/liboffload/src/OffloadLib.cpp b/offload/liboffload/src/OffloadLib.cpp new file mode 100644 index 0000000000000..37876713212c9 --- /dev/null +++ b/offload/liboffload/src/OffloadLib.cpp @@ -0,0 +1,44 @@ +//===- offload_lib.cpp - Entry points for the new LLVM/Offload API --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file pulls in the tablegen'd API entry point functions. +// +//===----------------------------------------------------------------------===// + +#include "OffloadImpl.hpp" +#include +#include + +#include + +llvm::StringSet<> &errorStrs() { + static llvm::StringSet<> ErrorStrs; + return ErrorStrs; +} + +ErrSetT &errors() { + static ErrSetT Errors{}; + return Errors; +} + +ol_code_location_t *¤tCodeLocation() { + thread_local ol_code_location_t *CodeLoc = nullptr; + return CodeLoc; +} + +OffloadConfig &offloadConfig() { + static OffloadConfig Config{}; + return Config; +} + +// Pull in the declarations for the implementation funtions. The actual entry +// points in this file wrap these. +#include "OffloadImplFuncDecls.inc" + +// Pull in the tablegen'd entry point definitions. +#include "OffloadEntryPoints.inc" diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 97540d5a3e2b3..63e2f80302c30 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -124,6 +124,7 @@ enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 }; /// we use the level to determine the indentation of the key-value property at /// printing time. See the enum InfoLevelKind for the list of accepted levels. class InfoQueueTy { +public: struct InfoQueueEntryTy { std::string Key; std::string Value; @@ -131,6 +132,7 @@ class InfoQueueTy { uint64_t Level; }; +private: std::deque Queue; public: @@ -153,6 +155,8 @@ class InfoQueueTy { Queue.push_back({Key, Value, Units, L}); } + const std::deque &getQueue() const { return Queue; } + /// Print all info entries added to the queue. void print() const { // We print four spances for each level. diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg index 2f1ef3e98d817..658ae5f9653ba 100644 --- a/offload/test/lit.cfg +++ b/offload/test/lit.cfg @@ -66,7 +66,7 @@ def evaluate_bool_env(env): config.name = 'libomptarget :: ' + config.libomptarget_current_target # suffixes: A list of file extensions to treat as test files. -config.suffixes = ['.c', '.cpp', '.cc', '.f90', '.cu'] +config.suffixes = ['.c', '.cpp', '.cc', '.f90', '.cu', '.td'] # excludes: A list of directories to exclude from the testuites. config.excludes = ['Inputs'] @@ -418,3 +418,4 @@ config.substitutions.append(("%flags", config.test_flags)) config.substitutions.append(("%not", config.libomptarget_not)) config.substitutions.append(("%offload-device-info", config.offload_device_info)) +config.substitutions.append(("%offload-tblgen", config.offload_tblgen)) diff --git a/offload/test/lit.site.cfg.in b/offload/test/lit.site.cfg.in index a1cb5acc38a40..ce3f6abf50a13 100644 --- a/offload/test/lit.site.cfg.in +++ b/offload/test/lit.site.cfg.in @@ -28,5 +28,6 @@ config.libomptarget_debug = @LIBOMPTARGET_DEBUG@ config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@ config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@ config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@ +config.offload_tblgen = "@OFFLOAD_TBLGEN_EXECUTABLE@" # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/offload/test/tools/offload-tblgen/default_returns.td b/offload/test/tools/offload-tblgen/default_returns.td new file mode 100644 index 0000000000000..995e24abf707d --- /dev/null +++ b/offload/test/tools/offload-tblgen/default_returns.td @@ -0,0 +1,40 @@ +// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API +// RUN: %offload-tblgen -gen-entry-points -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-VALIDATION + +// Check implicit returns are included in documentation and the validation +// wrappers where applicable + +include "APIDefs.td" + +def : Handle { + let name = "ol_foo_handle_t"; + let desc = "Example handle type"; +} + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"uint32_t", "ParamValue", "A plain value parameter">, + Param<"ol_foo_handle_t", "ParamHandle", "A handle parameter">, + Param<"uint32_t*", "ParamPointer", "A pointer parameter">, + Param<"uint32_t*", "ParamPointerOpt", "An optional pointer parameter", PARAM_OUT_OPTIONAL> + ]; + let returns = []; +} + +// CHECK-API: /// @returns +// CHECK-API: OL_RESULT_SUCCESS +// CHECK-API: OL_ERRC_INVALID_NULL_HANDLE +// CHECK-API-NEXT: `NULL == ParamHandle` +// CHECK-API: OL_ERRC_INVALID_NULL_POINTER +// CHECK-API-NEXT: `NULL == ParamPointer` +// CHECK-API-NOT: `NULL == ParamPointerOpt` + +// CHECK-VALIDATION: FunctionA_val +// CHECK-VALIDATION: if (NULL == ParamHandle) +// CHECK-VALIDATION-NEXT: return OL_ERRC_INVALID_NULL_HANDLE; +// CHECK-VALIDATION: if (NULL == ParamPointer) +// CHECK-VALIDATION-NEXT: return OL_ERRC_INVALID_NULL_POINTER; +// CHECK-VALIDATION-NOT: if (NULL == ParamPointerOpt) diff --git a/offload/test/tools/offload-tblgen/entry_points.td b/offload/test/tools/offload-tblgen/entry_points.td new file mode 100644 index 0000000000000..a66ddb9279920 --- /dev/null +++ b/offload/test/tools/offload-tblgen/entry_points.td @@ -0,0 +1,37 @@ +// RUN: %offload-tblgen -gen-entry-points -I %S/../../../liboffload/API %s | %fcheck-generic + +// Check entry point wrapper functions are generated correctly + +include "APIDefs.td" + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"uint32_t", "ParamA", "Parameter A description">, + Param<"uint32_t*", "ParamB", "Parameter B description">, + ]; + let returns = [ + Return<"OL_ERRC_INVALID_VALUE", ["When a value is invalid"]> + ]; +} + + +// The validation function should call the implementation function +// CHECK: FunctionA_val +// CHECK: return FunctionA_impl(ParamA, ParamB); + +// CHECK: ol_result_t{{.*}} FunctionA( + +// The entry point should print tracing output if enabled +// CHECK: if (offloadConfig().TracingEnabled) { +// CHECK-NEXT: "---> FunctionA"; + +// CHECK: Result = FunctionA_val(ParamA, ParamB); + +// Tracing should construct a param struct for printing +// CHECK: if (offloadConfig().TracingEnabled) { +// CHECK: function_a_params_t Params = {&ParamA, &ParamB}; + +// CHECK: return Result; diff --git a/offload/test/tools/offload-tblgen/functions_basic.td b/offload/test/tools/offload-tblgen/functions_basic.td new file mode 100644 index 0000000000000..dec93577b57e9 --- /dev/null +++ b/offload/test/tools/offload-tblgen/functions_basic.td @@ -0,0 +1,39 @@ +// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API +// RUN: %offload-tblgen -gen-exports -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-EXPORTS +// RUN: %offload-tblgen -gen-func-names -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-FUNC-MACRO + +// Check basic support for API functions + +include "APIDefs.td" + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"uint32_t", "ParamA", "Parameter A description">, + Param<"uint32_t*", "ParamB", "Parameter B description">, + ]; + let returns = [ + Return<"OL_ERRC_INVALID_VALUE", ["When a value is invalid"]> + ]; +} + +// CHECK-API: /// @brief Function A description +// CHECK-API: /// @details +// CHECK-API-NEXT: Function A detailed information +// CHECK-API: /// @returns +// CHECK-API: OL_ERRC_INVALID_VALUE +// CHECK-API-NEXT: When a value is invalid + +// CHECK-API: ol_result_t +// CHECK-API-SAME: FunctionA + +// CHECK-API: // Parameter A description +// CHECK-API-NEXT: uint32_t ParamA +// CHECK-API: // Parameter B description +// CHECK-API-NEXT: uint32_t* ParamB + +// CHECK-EXPORTS: FunctionA + +// CHECK-FUNC-MACRO: OFFLOAD_FUNC(FunctionA) diff --git a/offload/test/tools/offload-tblgen/functions_code_loc.td b/offload/test/tools/offload-tblgen/functions_code_loc.td new file mode 100644 index 0000000000000..aec20129343f5 --- /dev/null +++ b/offload/test/tools/offload-tblgen/functions_code_loc.td @@ -0,0 +1,26 @@ +// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API +// RUN: %offload-tblgen -gen-exports -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-EXPORTS +// RUN: %offload-tblgen -gen-func-names -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-FUNC-MACRO + +// Check that the function variant with code location information is generated +// and is otherwise the same as the regular function + +include "APIDefs.td" + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"uint32_t", "ParamA", "Parameter A description">, + Param<"uint32_t*", "ParamB", "Parameter B description">, + ]; + let returns = [ + Return<"OL_ERRC_INVALID_VALUE", ["When a value is invalid"]> + ]; +} + +// CHECK-API-DAG: ol_result_t{{.*}} FunctionA +// CHECK-API-DAG: ol_result_t{{.*}} FunctionAWithCodeLoc +// CHECK-EXPORTS: FunctionAWithCodeLoc +// CHECK-FUNC-MACRO: OFFLOAD_FUNC(FunctionAWithCodeLoc) diff --git a/offload/test/tools/offload-tblgen/functions_ranged_param.td b/offload/test/tools/offload-tblgen/functions_ranged_param.td new file mode 100644 index 0000000000000..21a84d8a70334 --- /dev/null +++ b/offload/test/tools/offload-tblgen/functions_ranged_param.td @@ -0,0 +1,36 @@ +// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic + +// Check that ranged function parameters are implemented correctly. These +// are pointers to an array of an arbitrary size. Their size is described as a +// range between two values. This is typically between 0 and a parameter such +// as NumItems. The range information helps the printing code print the entire +// range of the output rather than just the pointer or the first element. + +include "APIDefs.td" + +def : Handle { + let name = "some_handle_t"; + let desc = "An example handle type"; +} + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"size_t", "OutCount", "the number of things to write out", PARAM_IN>, + RangedParam<"some_handle_t*", "OutPtr", "pointer to the output things.", PARAM_OUT, + Range<"0", "OutCount">> + ]; + let returns = []; +} + +// CHECK: inline std::ostream &operator<<(std::ostream &os, const struct function_a_params_t *params) { +// CHECK: os << ".OutPtr = "; +// CHECK: for (size_t i = 0; i < *params->pOutCount; i++) { +// CHECK: if (i > 0) { +// CHECK: os << ", "; +// CHECK: } +// CHECK: printPtr(os, (*params->pOutPtr)[i]); +// CHECK: } +// CHECK: os << "}"; diff --git a/offload/test/tools/offload-tblgen/print_enum.td b/offload/test/tools/offload-tblgen/print_enum.td new file mode 100644 index 0000000000000..0b5506009bec5 --- /dev/null +++ b/offload/test/tools/offload-tblgen/print_enum.td @@ -0,0 +1,34 @@ +// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic + +// Check that print helpers are created for enums + +include "APIDefs.td" + +def : Enum { + let name = "my_enum_t"; + let desc = "An example enum"; + let etors =[ + Etor<"VALUE_ONE", "The first enum value">, + Etor<"VALUE_TWO", "The second enum value">, + Etor<"VALUE_THREE", "The third enum value">, + Etor<"VALUE_FOUR", "The fourth enum value">, + ]; +} + +// CHECK: inline std::ostream &operator<<(std::ostream &os, enum my_enum_t value) +// CHECK: switch (value) { +// CHECK: case MY_ENUM_VALUE_ONE: +// CHECK: os << "MY_ENUM_VALUE_ONE"; +// CHECK: break; +// CHECK: case MY_ENUM_VALUE_TWO: +// CHECK: os << "MY_ENUM_VALUE_TWO"; +// CHECK: break; +// CHECK: case MY_ENUM_VALUE_THREE: +// CHECK: os << "MY_ENUM_VALUE_THREE"; +// CHECK: break; +// CHECK: case MY_ENUM_VALUE_FOUR: +// CHECK: os << "MY_ENUM_VALUE_FOUR"; +// CHECK: break; +// CHECK: default: +// CHECK: os << "unknown enumerator"; +// CHECK: break; diff --git a/offload/test/tools/offload-tblgen/print_function.td b/offload/test/tools/offload-tblgen/print_function.td new file mode 100644 index 0000000000000..3f4944df65941 --- /dev/null +++ b/offload/test/tools/offload-tblgen/print_function.td @@ -0,0 +1,38 @@ +// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-PRINT +// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API + +// Check that print helpers are created for functions + +include "APIDefs.td" + +def : Handle { + let name = "ol_foo_handle_t"; + let desc = "Example handle type"; +} + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"uint32_t", "ParamValue", "A plain value parameter">, + Param<"ol_foo_handle_t", "ParamHandle", "A handle parameter">, + Param<"uint32_t*", "ParamPointer", "A pointer parameter">, + ]; + let returns = []; +} + +// CHECK-API: typedef struct function_a_params_t { +// CHECK-API-NEXT: uint32_t* pParamValue; +// CHECK-API-NEXT: ol_foo_handle_t* pParamHandle; +// CHECK-API-NEXT: uint32_t** pParamPointer; + +// CHECK-PRINT: inline std::ostream &operator<<(std::ostream &os, const struct function_a_params_t *params) +// CHECK-PRINT: os << ".ParamValue = "; +// CHECK-PRINT: os << *params->pParamValue; +// CHECK-PRINT: os << ", "; +// CHECK-PRINT: os << ".ParamHandle = "; +// CHECK-PRINT: printPtr(os, *params->pParamHandle); +// CHECK-PRINT: os << ", "; +// CHECK-PRINT: os << ".ParamPointer = "; +// CHECK-PRINT: printPtr(os, *params->pParamPointer); diff --git a/offload/test/tools/offload-tblgen/type_tagged_enum.td b/offload/test/tools/offload-tblgen/type_tagged_enum.td new file mode 100644 index 0000000000000..49e91e43bb6ef --- /dev/null +++ b/offload/test/tools/offload-tblgen/type_tagged_enum.td @@ -0,0 +1,76 @@ +// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API +// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-PRINT + +// Check that type-tagged enumerators are implemented correctly. They enable +// functions to return data of an arbitrary type and size via a void*, using +// the value of an enum parameter to indicate which type is being returned. +// This allows, for example, for a single olGetDeviceInfo function, rather +// than requiring a separate entry point for every possible query. + +include "APIDefs.td" + +def : Handle { + let name = "some_handle_t"; + let desc = "An example handle type"; +} + +def : Enum { + let name = "my_type_tagged_enum_t"; + let desc = "Example type tagged enum"; + let is_typed = 1; + let etors = [ + TaggedEtor<"VALUE_ONE", "uint32_t", "Value one.">, + TaggedEtor<"VALUE_TWO", "char[]", "Value two.">, + TaggedEtor<"VALUE_THREE", "some_handle_t", "Value three."> + ]; +} + +// Check the tagged types appear in the comments +// CHECK-API: typedef enum my_type_tagged_enum_t { +// CHECK-API-NEXT: [uint32_t] Value one. +// CHECK-API-NEXT: MY_TYPE_TAGGED_ENUM_VALUE_ONE = 0, +// CHECK-API-NEXT: [char[]] Value two. +// CHECK-API-NEXT: MY_TYPE_TAGGED_ENUM_VALUE_TWO = 1, +// CHECK-API-NEXT: [some_handle_t] Value three. +// CHECK-API-NEXT: MY_TYPE_TAGGED_ENUM_VALUE_THREE = 2, + +def : Function { + let name = "FunctionA"; + let desc = "Function A description"; + let details = [ "Function A detailed information" ]; + let params = [ + Param<"my_type_tagged_enum_t", "PropName", "type of the info to retrieve", PARAM_IN>, + Param<"size_t", "PropSize", "the number of bytes pointed to by PropValue.", PARAM_IN>, + TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. " + "If PropSize is not equal to or greater to the real number of bytes needed to return the info " + "then the OL_ERRC_INVALID_SIZE error is returned and PropValue is not used.", PARAM_OUT, + TypeInfo<"PropName" , "PropSize">> + ]; + let returns = []; +} + +// Check that a tagged enum print function definition is generated +// CHECK-PRINT: void printTagged(std::ostream &os, const void *ptr, my_type_tagged_enum_t value, size_t size) { +// CHECK-PRINT: case MY_TYPE_TAGGED_ENUM_VALUE_ONE: { +// CHECK-PRINT: const uint32_t * const tptr = (const uint32_t * const)ptr; +// CHECK-PRINT: os << (const void *)tptr << " ("; +// CHECK-PRINT: os << *tptr; +// CHECK-PRINT: os << ")"; +// CHECK-PRINT: break; +// CHECK-PRINT: } +// CHECK-PRINT: case MY_TYPE_TAGGED_ENUM_VALUE_TWO: { +// CHECK-PRINT: printPtr(os, (const char*) ptr); +// CHECK-PRINT: break; +// CHECK-PRINT: } +// CHECK-PRINT: case MY_TYPE_TAGGED_ENUM_VALUE_THREE: { +// CHECK-PRINT: const some_handle_t * const tptr = (const some_handle_t * const)ptr; +// CHECK-PRINT: os << (const void *)tptr << " ("; +// CHECK-PRINT: os << *tptr; +// CHECK-PRINT: os << ")"; +// CHECK-PRINT: break; +// CHECK-PRINT: } + +// Check that the tagged type information is used when printing function parameters +// CHECK-PRINT: std::ostream &operator<<(std::ostream &os, const struct function_a_params_t *params) { +// CHECK-PRINT: os << ".PropValue = " +// CHECK-PRINT-NEXT: printTagged(os, *params->pPropValue, *params->pPropName, *params->pPropSize); diff --git a/offload/tools/offload-tblgen/APIGen.cpp b/offload/tools/offload-tblgen/APIGen.cpp new file mode 100644 index 0000000000000..97a2464f7a75c --- /dev/null +++ b/offload/tools/offload-tblgen/APIGen.cpp @@ -0,0 +1,229 @@ +//===- offload-tblgen/APIGen.cpp - Tablegen backend for Offload header ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a Tablegen backend that produces the contents of the Offload API +// header. The generated comments are Doxygen compatible. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" + +#include "GenCommon.hpp" +#include "RecordTypes.hpp" + +using namespace llvm; +using namespace offload::tblgen; + +// Produce a possibly multi-line comment from the input string +static std::string MakeComment(StringRef in) { + std::string out = ""; + size_t LineStart = 0; + size_t LineBreak = 0; + while (LineBreak < in.size()) { + LineBreak = in.find_first_of("\n", LineStart); + if (LineBreak - LineStart <= 1) { + break; + } + out += std::string("/// ") + + in.substr(LineStart, LineBreak - LineStart).str() + "\n"; + LineStart = LineBreak + 1; + } + + return out; +} + +static void ProcessHandle(const HandleRec &H, raw_ostream &OS) { + OS << CommentsHeader; + OS << formatv("/// @brief {0}\n", H.getDesc()); + OS << formatv("typedef struct {0}_ *{0};\n", H.getName()); +} + +static void ProcessTypedef(const TypedefRec &T, raw_ostream &OS) { + OS << CommentsHeader; + OS << formatv("/// @brief {0}\n", T.getDesc()); + OS << formatv("typedef {0} {1};\n", T.getValue(), T.getName()); +} + +static void ProcessMacro(const MacroRec &M, raw_ostream &OS) { + OS << CommentsHeader; + OS << formatv("#ifndef {0}\n", M.getName()); + if (auto Condition = M.getCondition()) { + OS << formatv("#if {0}\n", *Condition); + } + OS << "/// @brief " << M.getDesc() << "\n"; + OS << formatv("#define {0} {1}\n", M.getNameWithArgs(), M.getValue()); + if (auto AltValue = M.getAltValue()) { + OS << "#else\n"; + OS << formatv("#define {0} {1}\n", M.getNameWithArgs(), *AltValue); + } + if (auto Condition = M.getCondition()) { + OS << formatv("#endif // {0}\n", *Condition); + } + OS << formatv("#endif // {0}\n", M.getName()); +} + +static void ProcessFunction(const FunctionRec &F, raw_ostream &OS) { + OS << CommentsHeader; + OS << formatv("/// @brief {0}\n", F.getDesc()); + OS << CommentsBreak; + + OS << "/// @details\n"; + for (auto &Detail : F.getDetails()) { + OS << formatv("/// - {0}\n", Detail); + } + OS << CommentsBreak; + + // Emit analogue remarks + auto Analogues = F.getAnalogues(); + if (!Analogues.empty()) { + OS << "/// @remarks\n/// _Analogues_\n"; + for (auto &Analogue : Analogues) { + OS << formatv("/// - **{0}**\n", Analogue); + } + OS << CommentsBreak; + } + + OS << "/// @returns\n"; + auto Returns = F.getReturns(); + for (auto &Ret : Returns) { + OS << formatv("/// - ::{0}\n", Ret.getValue()); + auto RetConditions = Ret.getConditions(); + for (auto &RetCondition : RetConditions) { + OS << formatv("/// + {0}\n", RetCondition); + } + } + + OS << formatv("{0}_APIEXPORT {1}_result_t {0}_APICALL ", PrefixUpper, + PrefixLower); + OS << F.getName(); + OS << "(\n"; + auto Params = F.getParams(); + for (auto &Param : Params) { + OS << MakeParamComment(Param) << "\n"; + OS << " " << Param.getType() << " " << Param.getName(); + if (Param != Params.back()) { + OS << ",\n"; + } else { + OS << "\n"; + } + } + OS << ");\n\n"; +} + +static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) { + OS << CommentsHeader; + OS << formatv("/// @brief {0}\n", Enum.getDesc()); + OS << formatv("typedef enum {0} {{\n", Enum.getName()); + + uint32_t EtorVal = 0; + for (const auto &EnumVal : Enum.getValues()) { + if (Enum.isTyped()) { + OS << MakeComment( + formatv("[{0}] {1}", EnumVal.getTaggedType(), EnumVal.getDesc()) + .str()); + } else { + OS << MakeComment(EnumVal.getDesc()); + } + OS << formatv(TAB_1 "{0}_{1} = {2},\n", Enum.getEnumValNamePrefix(), + EnumVal.getName(), EtorVal++); + } + + // Add force uint32 val + OS << formatv(TAB_1 "/// @cond\n" TAB_1 + "{0}_FORCE_UINT32 = 0x7fffffff\n" TAB_1 + "/// @endcond\n\n", + Enum.getEnumValNamePrefix()); + + OS << formatv("} {0};\n", Enum.getName()); +} + +static void ProcessStruct(const StructRec &Struct, raw_ostream &OS) { + OS << CommentsHeader; + OS << formatv("/// @brief {0}\n", Struct.getDesc()); + OS << formatv("typedef struct {0} {{\n", Struct.getName()); + + for (const auto &Member : Struct.getMembers()) { + OS << formatv(TAB_1 "{0} {1}; {2}", Member.getType(), Member.getName(), + MakeComment(Member.getDesc())); + } + + OS << formatv("} {0};\n\n", Struct.getName()); +} + +static void ProcessFuncParamStruct(const FunctionRec &Func, raw_ostream &OS) { + if (Func.getParams().size() == 0) { + return; + } + + auto FuncParamStructBegin = R"( +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for {0} +/// @details Each entry is a pointer to the parameter passed to the function; +typedef struct {1} {{ +)"; + + OS << formatv(FuncParamStructBegin, Func.getName(), + Func.getParamStructName()); + for (const auto &Param : Func.getParams()) { + OS << TAB_1 << Param.getType() << "* p" << Param.getName() << ";\n"; + } + OS << formatv("} {0};\n", Func.getParamStructName()); +} + +static void ProcessFuncWithCodeLocVariant(const FunctionRec &Func, + raw_ostream &OS) { + + auto FuncWithCodeLocBegin = R"( +/////////////////////////////////////////////////////////////////////////////// +/// @brief Variant of {0} that also sets source code location information +/// @details See also ::{0} +OL_APIEXPORT ol_result_t OL_APICALL {0}WithCodeLoc( +)"; + OS << formatv(FuncWithCodeLocBegin, Func.getName()); + auto Params = Func.getParams(); + for (auto &Param : Params) { + OS << " " << Param.getType() << " " << Param.getName(); + OS << ",\n"; + } + OS << "ol_code_location_t *CodeLocation);\n\n"; +} + +void EmitOffloadAPI(const RecordKeeper &Records, raw_ostream &OS) { + OS << GenericHeader; + OS << FileHeader; + // Generate main API definitions + for (auto *R : Records.getAllDerivedDefinitions("APIObject")) { + if (R->isSubClassOf("Macro")) { + ProcessMacro(MacroRec{R}, OS); + } else if (R->isSubClassOf("Typedef")) { + ProcessTypedef(TypedefRec{R}, OS); + } else if (R->isSubClassOf("Handle")) { + ProcessHandle(HandleRec{R}, OS); + } else if (R->isSubClassOf("Function")) { + ProcessFunction(FunctionRec{R}, OS); + } else if (R->isSubClassOf("Enum")) { + ProcessEnum(EnumRec{R}, OS); + } else if (R->isSubClassOf("Struct")) { + ProcessStruct(StructRec{R}, OS); + } + } + + // Generate auxiliary definitions (func param structs etc) + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + ProcessFuncParamStruct(FunctionRec{R}, OS); + } + + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + ProcessFuncWithCodeLocVariant(FunctionRec{R}, OS); + } + + OS << FileFooter; +} diff --git a/offload/tools/offload-tblgen/CMakeLists.txt b/offload/tools/offload-tblgen/CMakeLists.txt new file mode 100644 index 0000000000000..52986cbbaa918 --- /dev/null +++ b/offload/tools/offload-tblgen/CMakeLists.txt @@ -0,0 +1,24 @@ +##===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +include(TableGen) + +add_tablegen(offload-tblgen OFFLOAD + EXPORT OFFLOAD + APIGen.cpp + EntryPointGen.cpp + FuncsGen.cpp + GenCommon.hpp + Generators.hpp + offload-tblgen.cpp + PrintGen.cpp + RecordTypes.hpp + ) + +set(OFFLOAD_TABLEGEN_EXE "${OFFLOAD_TABLEGEN_EXE}" CACHE INTERNAL "") +set(OFFLOAD_TABLEGEN_TARGET "${OFFLOAD_TABLEGEN_TARGET}" CACHE INTERNAL "") + diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp new file mode 100644 index 0000000000000..990ff96a3121d --- /dev/null +++ b/offload/tools/offload-tblgen/EntryPointGen.cpp @@ -0,0 +1,138 @@ +//===- offload-tblgen/EntryPointGen.cpp - Tablegen backend for Offload ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a Tablegen backend that produces the actual entry points for the +// Offload API. It serves as a place to integrate functionality like tracing +// and validation before dispatching to the actual implementations. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormatVariadic.h" +#include "llvm/TableGen/Record.h" + +#include "GenCommon.hpp" +#include "RecordTypes.hpp" + +using namespace llvm; +using namespace offload::tblgen; + +static void EmitValidationFunc(const FunctionRec &F, raw_ostream &OS) { + OS << CommentsHeader; + // Emit preamble + OS << formatv("{0}_impl_result_t {1}_val(\n ", PrefixLower, F.getName()); + // Emit arguments + std::string ParamNameList = ""; + for (auto &Param : F.getParams()) { + OS << Param.getType() << " " << Param.getName(); + if (Param != F.getParams().back()) { + OS << ", "; + } + ParamNameList += Param.getName().str() + ", "; + } + OS << ") {\n"; + + OS << TAB_1 "if (true /*enableParameterValidation*/) {\n"; + // Emit validation checks + for (const auto &Return : F.getReturns()) { + for (auto &Condition : Return.getConditions()) { + if (Condition.starts_with("`") && Condition.ends_with("`")) { + auto ConditionString = Condition.substr(1, Condition.size() - 2); + OS << formatv(TAB_2 "if ({0}) {{\n", ConditionString); + OS << formatv(TAB_3 "return {0};\n", Return.getValue()); + OS << TAB_2 "}\n\n"; + } + } + } + OS << TAB_1 "}\n\n"; + + // Perform actual function call to the implementation + ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2); + OS << formatv(TAB_1 "return {0}_impl({1});\n\n", F.getName(), ParamNameList); + OS << "}\n"; +} + +static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) { + // Emit preamble + OS << formatv("{1}_APIEXPORT {0}_result_t {1}_APICALL {2}(\n ", PrefixLower, + PrefixUpper, F.getName()); + // Emit arguments + std::string ParamNameList = ""; + for (auto &Param : F.getParams()) { + OS << Param.getType() << " " << Param.getName(); + if (Param != F.getParams().back()) { + OS << ", "; + } + ParamNameList += Param.getName().str() + ", "; + } + OS << ") {\n"; + + // Emit pre-call prints + OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n"; + OS << formatv(TAB_2 "std::cout << \"---> {0}\";\n", F.getName()); + OS << TAB_1 "}\n\n"; + + // Perform actual function call to the validation wrapper + ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2); + OS << formatv(TAB_1 "{0}_result_t Result = {1}_val({2});\n\n", PrefixLower, + F.getName(), ParamNameList); + + // Emit post-call prints + OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n"; + if (F.getParams().size() > 0) { + OS << formatv(TAB_2 "{0} Params = {{", F.getParamStructName()); + for (const auto &Param : F.getParams()) { + OS << "&" << Param.getName(); + if (Param != F.getParams().back()) { + OS << ", "; + } + } + OS << formatv("};\n"); + OS << TAB_2 "std::cout << \"(\" << &Params << \")\";\n"; + } else { + OS << TAB_2 "std::cout << \"()\";\n"; + } + OS << TAB_2 "std::cout << \"-> \" << Result << \"\\n\";\n"; + OS << TAB_2 "if (Result && Result->Details) {\n"; + OS << TAB_3 "std::cout << \" *Error Details* \" << Result->Details " + "<< \" \\n\";\n"; + OS << TAB_2 "}\n"; + OS << TAB_1 "}\n"; + + OS << TAB_1 "return Result;\n"; + OS << "}\n"; +} + +static void EmitCodeLocWrapper(const FunctionRec &F, raw_ostream &OS) { + // Emit preamble + OS << formatv("{0}_result_t {1}WithCodeLoc(\n ", PrefixLower, F.getName()); + // Emit arguments + std::string ParamNameList = ""; + for (auto &Param : F.getParams()) { + OS << Param.getType() << " " << Param.getName() << ", "; + ParamNameList += Param.getName().str(); + if (Param != F.getParams().back()) { + ParamNameList += ", "; + } + } + OS << "ol_code_location_t *CodeLocation"; + OS << ") {\n"; + OS << TAB_1 "currentCodeLocation() = CodeLocation;\n"; + OS << formatv(TAB_1 "{0}_result_t Result = {1}({2});\n\n", PrefixLower, + F.getName(), ParamNameList); + OS << TAB_1 "currentCodeLocation() = nullptr;\n"; + OS << TAB_1 "return Result;\n"; + OS << "}\n"; +} + +void EmitOffloadEntryPoints(const RecordKeeper &Records, raw_ostream &OS) { + OS << GenericHeader; + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + EmitValidationFunc(FunctionRec{R}, OS); + EmitEntryPointFunc(FunctionRec{R}, OS); + EmitCodeLocWrapper(FunctionRec{R}, OS); + } +} diff --git a/offload/tools/offload-tblgen/FuncsGen.cpp b/offload/tools/offload-tblgen/FuncsGen.cpp new file mode 100644 index 0000000000000..3238652176198 --- /dev/null +++ b/offload/tools/offload-tblgen/FuncsGen.cpp @@ -0,0 +1,74 @@ +//===- offload-tblgen/APIGen.cpp - Tablegen backend for Offload functions -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a Tablegen backend that handles generation of various small files +// pertaining to the API functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormatVariadic.h" +#include "llvm/TableGen/Record.h" + +#include "GenCommon.hpp" +#include "RecordTypes.hpp" + +using namespace llvm; +using namespace offload::tblgen; + +// Emit a list of just the API function names +void EmitOffloadFuncNames(const RecordKeeper &Records, raw_ostream &OS) { + OS << GenericHeader; + OS << R"( +#ifndef OFFLOAD_FUNC +#error Please define the macro OFFLOAD_FUNC(Function) +#endif + +)"; + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + FunctionRec FR{R}; + OS << formatv("OFFLOAD_FUNC({0})", FR.getName()) << "\n"; + } + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + FunctionRec FR{R}; + OS << formatv("OFFLOAD_FUNC({0}WithCodeLoc)", FR.getName()) << "\n"; + } + + OS << "\n#undef OFFLOAD_FUNC\n"; +} + +void EmitOffloadExports(const RecordKeeper &Records, raw_ostream &OS) { + OS << "VERS1.0 {\n"; + OS << TAB_1 "global:\n"; + + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + OS << formatv(TAB_2 "{0};\n", FunctionRec(R).getName()); + } + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + OS << formatv(TAB_2 "{0}WithCodeLoc;\n", FunctionRec(R).getName()); + } + OS << TAB_1 "local:\n"; + OS << TAB_2 "*;\n"; + OS << "};\n"; +} + +// Emit declarations for every implementation function +void EmitOffloadImplFuncDecls(const RecordKeeper &Records, raw_ostream &OS) { + OS << GenericHeader; + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + FunctionRec F{R}; + OS << formatv("{0}_impl_result_t {1}_impl(", PrefixLower, F.getName()); + auto Params = F.getParams(); + for (auto &Param : Params) { + OS << Param.getType() << " " << Param.getName(); + if (Param != Params.back()) { + OS << ", "; + } + } + OS << ");\n\n"; + } +} diff --git a/offload/tools/offload-tblgen/GenCommon.hpp b/offload/tools/offload-tblgen/GenCommon.hpp new file mode 100644 index 0000000000000..db432e9958b5d --- /dev/null +++ b/offload/tools/offload-tblgen/GenCommon.hpp @@ -0,0 +1,67 @@ +//===- offload-tblgen/GenCommon.cpp - Common defs for Offload generators --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "RecordTypes.hpp" +#include "llvm/Support/FormatVariadic.h" + +// Having inline bits of tabbed code is hard to read, provide some definitions +// so we can keep things tidier +#define TAB_1 " " +#define TAB_2 " " +#define TAB_3 " " +#define TAB_4 " " +#define TAB_5 " " + +constexpr auto GenericHeader = + R"(//===- Auto-generated file, part of the LLVM/Offload project --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +)"; + +constexpr auto FileHeader = R"( +// Auto-generated file, do not manually edit. + +#pragma once + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +)"; + +constexpr auto FileFooter = R"( +#if defined(__cplusplus) +} // extern "C" +#endif + +)"; + +constexpr auto CommentsHeader = R"( +/////////////////////////////////////////////////////////////////////////////// +)"; + +constexpr auto CommentsBreak = "///\n"; + +constexpr auto PrefixLower = "ol"; +constexpr auto PrefixUpper = "OL"; + +inline std::string +MakeParamComment(const llvm::offload::tblgen::ParamRec &Param) { + return llvm::formatv("// {0}{1}{2} {3}", (Param.isIn() ? "[in]" : ""), + (Param.isOut() ? "[out]" : ""), + (Param.isOpt() ? "[optional]" : ""), Param.getDesc()); +} diff --git a/offload/tools/offload-tblgen/Generators.hpp b/offload/tools/offload-tblgen/Generators.hpp new file mode 100644 index 0000000000000..8b6104c5cd9c6 --- /dev/null +++ b/offload/tools/offload-tblgen/Generators.hpp @@ -0,0 +1,23 @@ +//===- offload-tblgen/Generators.hpp - Offload generator declarations -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/TableGen/Record.h" + +void EmitOffloadAPI(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitOffloadFuncNames(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitOffloadImplFuncDecls(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitOffloadEntryPoints(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitOffloadPrintHeader(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitOffloadExports(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp new file mode 100644 index 0000000000000..2a7c63c3dfd1f --- /dev/null +++ b/offload/tools/offload-tblgen/PrintGen.cpp @@ -0,0 +1,226 @@ +//===- offload-tblgen/APIGen.cpp - Tablegen backend for Offload printing --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a Tablegen backend that produces print functions for the Offload API +// entry point functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormatVariadic.h" +#include "llvm/TableGen/Record.h" + +#include "GenCommon.hpp" +#include "RecordTypes.hpp" + +using namespace llvm; +using namespace offload::tblgen; + +constexpr auto PrintEnumHeader = + R"(/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the {0} type +/// @returns std::ostream & +)"; + +constexpr auto PrintTaggedEnumHeader = + R"(/////////////////////////////////////////////////////////////////////////////// +/// @brief Print type-tagged {0} enum value +/// @returns std::ostream & +)"; + +static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) { + OS << formatv(PrintEnumHeader, Enum.getName()); + OS << formatv( + "inline std::ostream &operator<<(std::ostream &os, enum {0} value) " + "{{\n" TAB_1 "switch (value) {{\n", + Enum.getName()); + + for (const auto &Val : Enum.getValues()) { + auto Name = Enum.getEnumValNamePrefix() + "_" + Val.getName(); + OS << formatv(TAB_1 "case {0}:\n", Name); + OS << formatv(TAB_2 "os << \"{0}\";\n", Name); + OS << formatv(TAB_2 "break;\n"); + } + + OS << TAB_1 "default:\n" TAB_2 "os << \"unknown enumerator\";\n" TAB_2 + "break;\n" TAB_1 "}\n" TAB_1 "return os;\n}\n\n"; + + if (!Enum.isTyped()) { + return; + } + + OS << formatv(PrintTaggedEnumHeader, Enum.getName()); + + OS << formatv(R"""(template <> +inline void printTagged(std::ostream &os, const void *ptr, {0} value, size_t size) {{ + if (ptr == NULL) {{ + printPtr(os, ptr); + return; + } + + switch (value) {{ +)""", + Enum.getName()); + + for (const auto &Val : Enum.getValues()) { + auto Name = Enum.getEnumValNamePrefix() + "_" + Val.getName(); + auto Type = Val.getTaggedType(); + OS << formatv(TAB_1 "case {0}: {{\n", Name); + // Special case for strings + if (Type == "char[]") { + OS << formatv(TAB_2 "printPtr(os, (const char*) ptr);\n"); + } else { + OS << formatv(TAB_2 "const {0} * const tptr = (const {0} * const)ptr;\n", + Type); + // TODO: Handle other cases here + OS << TAB_2 "os << (const void *)tptr << \" (\";\n"; + if (Type.ends_with("*")) { + OS << TAB_2 "os << printPtr(os, tptr);\n"; + } else { + OS << TAB_2 "os << *tptr;\n"; + } + OS << TAB_2 "os << \")\";\n"; + } + OS << formatv(TAB_2 "break;\n" TAB_1 "}\n"); + } + + OS << TAB_1 "default:\n" TAB_2 "os << \"unknown enumerator\";\n" TAB_2 + "break;\n" TAB_1 "}\n"; + + OS << "}\n"; +} + +static void EmitResultPrint(raw_ostream &OS) { + OS << R""( +inline std::ostream &operator<<(std::ostream &os, + const ol_error_struct_t *Err) { + if (Err == nullptr) { + os << "OL_SUCCESS"; + } else { + os << Err->Code; + } + return os; +} +)""; +} + +static void EmitFunctionParamStructPrint(const FunctionRec &Func, + raw_ostream &OS) { + if (Func.getParams().size() == 0) { + return; + } + + OS << formatv(R"( +inline std::ostream &operator<<(std::ostream &os, const struct {0} *params) {{ +)", + Func.getParamStructName()); + + for (const auto &Param : Func.getParams()) { + OS << formatv(TAB_1 "os << \".{0} = \";\n", Param.getName()); + if (auto Range = Param.getRange()) { + OS << formatv(TAB_1 "os << \"{{\";\n"); + OS << formatv(TAB_1 "for (size_t i = {0}; i < *params->p{1}; i++) {{\n", + Range->first, Range->second); + OS << TAB_2 "if (i > 0) {\n"; + OS << TAB_3 " os << \", \";\n"; + OS << TAB_2 "}\n"; + OS << formatv(TAB_2 "printPtr(os, (*params->p{0})[i]);\n", + Param.getName()); + OS << formatv(TAB_1 "}\n"); + OS << formatv(TAB_1 "os << \"}\";\n"); + } else if (auto TypeInfo = Param.getTypeInfo()) { + OS << formatv( + TAB_1 + "printTagged(os, *params->p{0}, *params->p{1}, *params->p{2});\n", + Param.getName(), TypeInfo->first, TypeInfo->second); + } else if (Param.isPointerType() || Param.isHandleType()) { + OS << formatv(TAB_1 "printPtr(os, *params->p{0});\n", Param.getName()); + } else { + OS << formatv(TAB_1 "os << *params->p{0};\n", Param.getName()); + } + if (Param != Func.getParams().back()) { + OS << TAB_1 "os << \", \";\n"; + } + } + + OS << TAB_1 "return os;\n}\n"; +} + +void EmitOffloadPrintHeader(const RecordKeeper &Records, raw_ostream &OS) { + OS << GenericHeader; + OS << R"""( +// Auto-generated file, do not manually edit. + +#pragma once + +#include +#include + + +template inline ol_result_t printPtr(std::ostream &os, const T *ptr); +template inline void printTagged(std::ostream &os, const void *ptr, T value, size_t size); +)"""; + + // ========== + OS << "template struct is_handle : std::false_type {};\n"; + for (auto *R : Records.getAllDerivedDefinitions("Handle")) { + HandleRec H{R}; + OS << formatv("template <> struct is_handle<{0}> : std::true_type {{};\n", + H.getName()); + } + OS << "template inline constexpr bool is_handle_v = " + "is_handle::value;\n"; + // ========= + + // Forward declare the operator<< overloads so their implementations can + // use each other. + OS << "\n"; + for (auto *R : Records.getAllDerivedDefinitions("Enum")) { + OS << formatv( + "inline std::ostream &operator<<(std::ostream &os, enum {0} value);\n", + EnumRec{R}.getName()); + } + OS << "\n"; + + // Create definitions + for (auto *R : Records.getAllDerivedDefinitions("Enum")) { + EnumRec E{R}; + ProcessEnum(E, OS); + } + EmitResultPrint(OS); + + // Emit print functions for the function param structs + for (auto *R : Records.getAllDerivedDefinitions("Function")) { + EmitFunctionParamStructPrint(FunctionRec{R}, OS); + } + + OS << R"""( +/////////////////////////////////////////////////////////////////////////////// +// @brief Print pointer value +template inline ol_result_t printPtr(std::ostream &os, const T *ptr) { + if (ptr == nullptr) { + os << "nullptr"; + } else if constexpr (std::is_pointer_v) { + os << (const void *)(ptr) << " ("; + printPtr(os, *ptr); + os << ")"; + } else if constexpr (std::is_void_v || is_handle_v) { + os << (const void *)ptr; + } else if constexpr (std::is_same_v, char>) { + os << (const void *)(ptr) << " ("; + os << ptr; + os << ")"; + } else { + os << (const void *)(ptr) << " ("; + os << *ptr; + os << ")"; + } + + return OL_SUCCESS; +} + )"""; +} diff --git a/offload/tools/offload-tblgen/RecordTypes.hpp b/offload/tools/offload-tblgen/RecordTypes.hpp new file mode 100644 index 0000000000000..0bf3256c525d9 --- /dev/null +++ b/offload/tools/offload-tblgen/RecordTypes.hpp @@ -0,0 +1,227 @@ +//===- offload-tblgen/RecordTypes.cpp - Offload record type wrappers -----===-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include "llvm/TableGen/Record.h" + +namespace llvm { +namespace offload { +namespace tblgen { + +class HandleRec { +public: + explicit HandleRec(const Record *rec) : rec(rec) {} + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + +private: + const Record *rec; +}; + +class MacroRec { +public: + explicit MacroRec(const Record *rec) : rec(rec) { + auto Name = rec->getValueAsString("name"); + auto OpenBrace = Name.find_first_of("("); + nameWithoutArgs = Name.substr(0, OpenBrace); + } + StringRef getName() const { return nameWithoutArgs; } + StringRef getNameWithArgs() const { return rec->getValueAsString("name"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + + std::optional getCondition() const { + return rec->getValueAsOptionalString("condition"); + } + StringRef getValue() const { return rec->getValueAsString("value"); } + std::optional getAltValue() const { + return rec->getValueAsOptionalString("alt_value"); + } + +private: + const Record *rec; + std::string nameWithoutArgs; +}; + +class TypedefRec { +public: + explicit TypedefRec(const Record *rec) : rec(rec) {} + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + StringRef getValue() const { return rec->getValueAsString("value"); } + +private: + const Record *rec; +}; + +class EnumValueRec { +public: + explicit EnumValueRec(const Record *rec) : rec(rec) {} + std::string getName() const { return rec->getValueAsString("name").upper(); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + StringRef getTaggedType() const { + return rec->getValueAsString("tagged_type"); + } + +private: + const Record *rec; +}; + +class EnumRec { +public: + explicit EnumRec(const Record *rec) : rec(rec) { + for (const auto *Val : rec->getValueAsListOfDefs("etors")) { + vals.emplace_back(EnumValueRec{Val}); + } + } + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + const std::vector &getValues() const { return vals; } + + std::string getEnumValNamePrefix() const { + return StringRef(getName().str().substr(0, getName().str().length() - 2)) + .upper(); + } + + bool isTyped() const { return rec->getValueAsBit("is_typed"); } + +private: + const Record *rec; + std::vector vals; +}; + +class StructMemberRec { +public: + explicit StructMemberRec(const Record *rec) : rec(rec) {} + StringRef getType() const { return rec->getValueAsString("type"); } + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + +private: + const Record *rec; +}; + +class StructRec { +public: + explicit StructRec(const Record *rec) : rec(rec) { + for (auto *Member : rec->getValueAsListOfDefs("all_members")) { + members.emplace_back(StructMemberRec(Member)); + } + } + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + std::optional getBaseClass() const { + return rec->getValueAsOptionalString("base_class"); + } + const std::vector &getMembers() const { return members; } + +private: + const Record *rec; + std::vector members; +}; + +class ParamRec { +public: + explicit ParamRec(const Record *rec) : rec(rec) { + flags = rec->getValueAsBitsInit("flags"); + auto *Range = rec->getValueAsDef("range"); + auto RangeBegin = Range->getValueAsString("begin"); + auto RangeEnd = Range->getValueAsString("end"); + if (RangeBegin != "" && RangeEnd != "") { + range = {RangeBegin, RangeEnd}; + } else { + range = std::nullopt; + } + + auto *TypeInfo = rec->getValueAsDef("type_info"); + auto TypeInfoEnum = TypeInfo->getValueAsString("enum"); + auto TypeInfoSize = TypeInfo->getValueAsString("size"); + if (TypeInfoEnum != "" && TypeInfoSize != "") { + typeinfo = {TypeInfoEnum, TypeInfoSize}; + } else { + typeinfo = std::nullopt; + } + } + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getType() const { return rec->getValueAsString("type"); } + bool isPointerType() const { return getType().ends_with('*'); } + bool isHandleType() const { return getType().ends_with("_handle_t"); } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + bool isIn() const { return dyn_cast(flags->getBit(0))->getValue(); } + bool isOut() const { return dyn_cast(flags->getBit(1))->getValue(); } + bool isOpt() const { return dyn_cast(flags->getBit(2))->getValue(); } + + const Record *getRec() const { return rec; } + std::optional> getRange() const { + return range; + } + + std::optional> getTypeInfo() const { + return typeinfo; + } + + // Needed to check whether we're at the back of a vector of params + bool operator!=(const ParamRec &p) const { return rec != p.getRec(); } + +private: + const Record *rec; + const BitsInit *flags; + std::optional> range; + std::optional> typeinfo; +}; + +class ReturnRec { +public: + ReturnRec(const Record *rec) : rec(rec) {} + StringRef getValue() const { return rec->getValueAsString("value"); } + std::vector getConditions() const { + return rec->getValueAsListOfStrings("conditions"); + } + +private: + const Record *rec; +}; + +class FunctionRec { +public: + FunctionRec(const Record *rec) : rec(rec) { + for (auto &Ret : rec->getValueAsListOfDefs("all_returns")) + rets.emplace_back(Ret); + for (auto &Param : rec->getValueAsListOfDefs("params")) + params.emplace_back(Param); + } + + std::string getParamStructName() const { + return llvm::formatv("{0}_params_t", + llvm::convertToSnakeFromCamelCase(getName())); + } + + StringRef getName() const { return rec->getValueAsString("name"); } + StringRef getClass() const { return rec->getValueAsString("api_class"); } + const std::vector &getReturns() const { return rets; } + const std::vector &getParams() const { return params; } + StringRef getDesc() const { return rec->getValueAsString("desc"); } + std::vector getDetails() const { + return rec->getValueAsListOfStrings("details"); + } + std::vector getAnalogues() const { + return rec->getValueAsListOfStrings("analogues"); + } + +private: + std::vector rets; + std::vector params; + + const Record *rec; +}; + +} // namespace tblgen +} // namespace offload +} // namespace llvm diff --git a/offload/tools/offload-tblgen/offload-tblgen.cpp b/offload/tools/offload-tblgen/offload-tblgen.cpp new file mode 100644 index 0000000000000..1912abf5265c7 --- /dev/null +++ b/offload/tools/offload-tblgen/offload-tblgen.cpp @@ -0,0 +1,101 @@ +//===- offload-tblgen/offload-tblgen.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a Tablegen tool that produces source files for the Offload project. +// See offload/API/README.md for more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/TableGen/Main.h" +#include "llvm/TableGen/Record.h" + +#include "Generators.hpp" + +namespace llvm { +namespace offload { +namespace tblgen { + +enum ActionType { + PrintRecords, + DumpJSON, + GenAPI, + GenFuncNames, + GenImplFuncDecls, + GenEntryPoints, + GenPrintHeader, + GenExports +}; + +namespace { +cl::opt Action( + cl::desc("Action to perform:"), + cl::values( + clEnumValN(PrintRecords, "print-records", + "Print all records to stdout (default)"), + clEnumValN(DumpJSON, "dump-json", + "Dump all records as machine-readable JSON"), + clEnumValN(GenAPI, "gen-api", "Generate Offload API header contents"), + clEnumValN(GenFuncNames, "gen-func-names", + "Generate a list of all Offload API function names"), + clEnumValN( + GenImplFuncDecls, "gen-impl-func-decls", + "Generate declarations for Offload API implementation functions"), + clEnumValN(GenEntryPoints, "gen-entry-points", + "Generate Offload API wrapper function definitions"), + clEnumValN(GenPrintHeader, "gen-print-header", + "Generate Offload API print header"), + clEnumValN(GenExports, "gen-exports", + "Generate export file for the Offload library"))); +} + +static bool OffloadTableGenMain(raw_ostream &OS, const RecordKeeper &Records) { + switch (Action) { + case PrintRecords: + OS << Records; + break; + case DumpJSON: + EmitJSON(Records, OS); + break; + case GenAPI: + EmitOffloadAPI(Records, OS); + break; + case GenFuncNames: + EmitOffloadFuncNames(Records, OS); + break; + case GenImplFuncDecls: + EmitOffloadImplFuncDecls(Records, OS); + break; + case GenEntryPoints: + EmitOffloadEntryPoints(Records, OS); + break; + case GenPrintHeader: + EmitOffloadPrintHeader(Records, OS); + break; + case GenExports: + EmitOffloadExports(Records, OS); + break; + } + + return false; +} + +int OffloadTblgenMain(int argc, char **argv) { + InitLLVM y(argc, argv); + cl::ParseCommandLineOptions(argc, argv); + return TableGenMain(argv[0], &OffloadTableGenMain); +} +} // namespace tblgen +} // namespace offload +} // namespace llvm + +using namespace llvm; +using namespace offload::tblgen; + +int main(int argc, char **argv) { return OffloadTblgenMain(argc, argv); } diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt index 73c87b708d25f..25ac4b2fa3675 100644 --- a/offload/unittests/CMakeLists.txt +++ b/offload/unittests/CMakeLists.txt @@ -5,4 +5,5 @@ function(add_libompt_unittest test_dirname) add_unittest(LibomptUnitTests ${test_dirname} ${ARGN}) endfunction() -add_subdirectory(Plugins) +# add_subdirectory(Plugins) +add_subdirectory(OffloadAPI) diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt new file mode 100644 index 0000000000000..033ee2b6ec746 --- /dev/null +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -0,0 +1,16 @@ +set(PLUGINS_TEST_COMMON LLVMOffload) +set(PLUGINS_TEST_INCLUDE ${LIBOMPTARGET_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/common) + +add_libompt_unittest("offload.unittests" + ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatform.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformCount.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformInfo.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformInfoSize.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDevice.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceCount.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfo.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp) +add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON}) +target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON}) +target_include_directories("offload.unittests" PRIVATE ${PLUGINS_TEST_INCLUDE}) diff --git a/offload/unittests/OffloadAPI/common/Environment.cpp b/offload/unittests/OffloadAPI/common/Environment.cpp new file mode 100644 index 0000000000000..f07a66cda2189 --- /dev/null +++ b/offload/unittests/OffloadAPI/common/Environment.cpp @@ -0,0 +1,96 @@ +//===------- Offload API tests - gtest environment ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Environment.hpp" +#include "Fixtures.hpp" +#include "llvm/Support/CommandLine.h" +#include + +using namespace llvm; + +// Wrapper so we don't have to constantly init and shutdown Offload in every +// test, while having sensible lifetime for the platform environment +struct OffloadInitWrapper { + OffloadInitWrapper() { olInit(); } + ~OffloadInitWrapper() { olShutDown(); } +}; +static OffloadInitWrapper Wrapper{}; + +static cl::opt + SelectedPlatform("platform", cl::desc("Only test the specified platform"), + cl::value_desc("platform")); + +std::ostream &operator<<(std::ostream &Out, + const ol_platform_handle_t &Platform) { + size_t Size; + olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_NAME, &Size); + std::vector Name(Size); + olGetPlatformInfo(Platform, OL_PLATFORM_INFO_NAME, Size, Name.data()); + Out << Name.data(); + return Out; +} + +std::ostream &operator<<(std::ostream &Out, + const std::vector &Platforms) { + for (auto Platform : Platforms) { + Out << "\n * \"" << Platform << "\""; + } + return Out; +} + +const std::vector &TestEnvironment::getPlatforms() { + static std::vector Platforms{}; + + if (Platforms.empty()) { + uint32_t PlatformCount = 0; + olGetPlatformCount(&PlatformCount); + if (PlatformCount > 0) { + Platforms.resize(PlatformCount); + olGetPlatform(PlatformCount, Platforms.data()); + } + } + + return Platforms; +} + +// Get a single platform, which may be selected by the user. +ol_platform_handle_t TestEnvironment::getPlatform() { + static ol_platform_handle_t Platform = nullptr; + const auto &Platforms = getPlatforms(); + + if (!Platform) { + if (SelectedPlatform != "") { + for (const auto CandidatePlatform : Platforms) { + std::stringstream PlatformName; + PlatformName << CandidatePlatform; + if (SelectedPlatform == PlatformName.str()) { + Platform = CandidatePlatform; + return Platform; + } + } + std::cout << "No platform found with the name \"" << SelectedPlatform + << "\". Choose from:" << Platforms << "\n"; + std::exit(1); + } else { + // Pick a single platform. We prefer one that has available devices, but + // just pick the first initially in case none have any devices. + Platform = Platforms[0]; + for (auto CandidatePlatform : Platforms) { + uint32_t NumDevices = 0; + if (olGetDeviceCount(CandidatePlatform, &NumDevices) == OL_SUCCESS) { + if (NumDevices > 0) { + Platform = CandidatePlatform; + break; + } + } + } + } + } + + return Platform; +} diff --git a/offload/unittests/OffloadAPI/common/Environment.hpp b/offload/unittests/OffloadAPI/common/Environment.hpp new file mode 100644 index 0000000000000..6dba2381eb0b7 --- /dev/null +++ b/offload/unittests/OffloadAPI/common/Environment.hpp @@ -0,0 +1,17 @@ +//===------- Offload API tests - gtest environment ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace TestEnvironment { +const std::vector &getPlatforms(); +ol_platform_handle_t getPlatform(); +} // namespace TestEnvironment diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp new file mode 100644 index 0000000000000..410a435dee1b5 --- /dev/null +++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp @@ -0,0 +1,64 @@ +//===------- Offload API tests - gtest fixtures --==-----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include "Environment.hpp" + +#pragma once + +#ifndef ASSERT_SUCCESS +#define ASSERT_SUCCESS(ACTUAL) ASSERT_EQ(OL_SUCCESS, ACTUAL) +#endif + +// TODO: rework this so the EXPECTED/ACTUAL results are readable +#ifndef ASSERT_ERROR +#define ASSERT_ERROR(EXPECTED, ACTUAL) \ + do { \ + ol_result_t Res = ACTUAL; \ + ASSERT_TRUE(Res && (Res->Code == EXPECTED)); \ + } while (0) +#endif + +#define RETURN_ON_FATAL_FAILURE(...) \ + __VA_ARGS__; \ + if (this->HasFatalFailure() || this->IsSkipped()) { \ + return; \ + } \ + (void)0 + +struct offloadTest : ::testing::Test { + // No special behavior now, but just in case we need to override it in future +}; + +struct offloadPlatformTest : offloadTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(offloadTest::SetUp()); + + Platform = TestEnvironment::getPlatform(); + ASSERT_NE(Platform, nullptr); + } + + ol_platform_handle_t Platform; +}; + +struct offloadDeviceTest : offloadPlatformTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(offloadPlatformTest::SetUp()); + + uint32_t NumDevices; + ASSERT_SUCCESS(olGetDeviceCount(Platform, &NumDevices)); + if (NumDevices == 0) + GTEST_SKIP() << "No available devices on this platform."; + ASSERT_SUCCESS(olGetDevice(Platform, 1, &Device)); + } + + ol_device_handle_t Device; +}; diff --git a/offload/unittests/OffloadAPI/device/olDeviceInfo.hpp b/offload/unittests/OffloadAPI/device/olDeviceInfo.hpp new file mode 100644 index 0000000000000..06915258da384 --- /dev/null +++ b/offload/unittests/OffloadAPI/device/olDeviceInfo.hpp @@ -0,0 +1,21 @@ +//===------- Offload API tests - Helpers for device info query testing ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +// TODO: We could autogenerate these +inline std::vector DeviceQueries = { + OL_DEVICE_INFO_TYPE, OL_DEVICE_INFO_PLATFORM, OL_DEVICE_INFO_NAME, + OL_DEVICE_INFO_VENDOR, OL_DEVICE_INFO_DRIVER_VERSION}; + +inline std::unordered_map DeviceInfoSizeMap = { + {OL_DEVICE_INFO_TYPE, sizeof(ol_device_type_t)}, + {OL_DEVICE_INFO_PLATFORM, sizeof(ol_platform_handle_t)}, +}; diff --git a/offload/unittests/OffloadAPI/device/olGetDevice.cpp b/offload/unittests/OffloadAPI/device/olGetDevice.cpp new file mode 100644 index 0000000000000..68d4682dd3351 --- /dev/null +++ b/offload/unittests/OffloadAPI/device/olGetDevice.cpp @@ -0,0 +1,39 @@ +//===------- Offload API tests - olGetDevice -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +using olGetDeviceTest = offloadPlatformTest; + +TEST_F(olGetDeviceTest, Success) { + uint32_t Count = 0; + ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count)); + if (Count == 0) + GTEST_SKIP() << "No available devices on this platform."; + + std::vector Devices(Count); + ASSERT_SUCCESS(olGetDevice(Platform, Count, Devices.data())); + for (auto Device : Devices) { + ASSERT_NE(nullptr, Device); + } +} + +TEST_F(olGetDeviceTest, SuccessSubsetOfDevices) { + uint32_t Count; + ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count)); + if (Count < 2) + GTEST_SKIP() << "Only one device is available on this platform."; + + std::vector Devices(Count - 1); + ASSERT_SUCCESS(olGetDevice(Platform, Count - 1, Devices.data())); + for (auto Device : Devices) { + ASSERT_NE(nullptr, Device); + } +} diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp new file mode 100644 index 0000000000000..ef377d671bf60 --- /dev/null +++ b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp @@ -0,0 +1,28 @@ +//===------- Offload API tests - olGetDeviceCount --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +using olGetDeviceCountTest = offloadPlatformTest; + +TEST_F(olGetDeviceCountTest, Success) { + uint32_t Count = 0; + ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count)); +} + +TEST_F(olGetDeviceCountTest, InvalidNullPlatform) { + uint32_t Count = 0; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olGetDeviceCount(nullptr, &Count)); +} + +TEST_F(olGetDeviceCountTest, InvalidNullPointer) { + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetDeviceCount(Platform, nullptr)); +} diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp new file mode 100644 index 0000000000000..c936802fb1e4d --- /dev/null +++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp @@ -0,0 +1,76 @@ +//===------- Offload API tests - olGetDeviceInfo ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include "olDeviceInfo.hpp" +#include +#include + +struct olGetDeviceInfoTest : offloadDeviceTest, + ::testing::WithParamInterface { + + void SetUp() override { RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp()); } +}; + +INSTANTIATE_TEST_SUITE_P( + , olGetDeviceInfoTest, ::testing::ValuesIn(DeviceQueries), + [](const ::testing::TestParamInfo &info) { + std::stringstream ss; + ss << info.param; + return ss.str(); + }); + +TEST_P(olGetDeviceInfoTest, Success) { + ol_device_info_t InfoType = GetParam(); + size_t Size = 0; + + ASSERT_SUCCESS(olGetDeviceInfoSize(Device, InfoType, &Size)); + + std::vector InfoData(Size); + ASSERT_SUCCESS(olGetDeviceInfo(Device, InfoType, Size, InfoData.data())); + + if (InfoType == OL_DEVICE_INFO_PLATFORM) { + auto *ReturnedPlatform = + reinterpret_cast(InfoData.data()); + ASSERT_EQ(Platform, *ReturnedPlatform); + } +} + +TEST_F(olGetDeviceInfoTest, InvalidNullHandleDevice) { + ol_device_type_t DeviceType; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetDeviceInfo(nullptr, OL_DEVICE_INFO_TYPE, + sizeof(ol_device_type_t), &DeviceType)); +} + +TEST_F(olGetDeviceInfoTest, InvalidEnumerationInfoType) { + ol_device_type_t DeviceType; + ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, + olGetDeviceInfo(Device, OL_DEVICE_INFO_FORCE_UINT32, + sizeof(ol_device_type_t), &DeviceType)); +} + +TEST_F(olGetDeviceInfoTest, InvalidSizePropSize) { + ol_device_type_t DeviceType; + ASSERT_ERROR(OL_ERRC_INVALID_SIZE, + olGetDeviceInfo(Device, OL_DEVICE_INFO_TYPE, 0, &DeviceType)); +} + +TEST_F(olGetDeviceInfoTest, InvalidSizePropSizeSmall) { + ol_device_type_t DeviceType; + ASSERT_ERROR(OL_ERRC_INVALID_SIZE, + olGetDeviceInfo(Device, OL_DEVICE_INFO_TYPE, + sizeof(DeviceType) - 1, &DeviceType)); +} + +TEST_F(olGetDeviceInfoTest, InvalidNullPointerPropValue) { + ol_device_type_t DeviceType; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetDeviceInfo(Device, OL_DEVICE_INFO_TYPE, sizeof(DeviceType), + nullptr)); +} diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp new file mode 100644 index 0000000000000..9e792d1c3e25e --- /dev/null +++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp @@ -0,0 +1,58 @@ +//===------- Offload API tests - olGetDeviceInfoSize -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../common/Fixtures.hpp" +#include "olDeviceInfo.hpp" + +struct olGetDeviceInfoSizeTest + : offloadDeviceTest, + ::testing::WithParamInterface { + + void SetUp() override { RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp()); } +}; + +// TODO: We could autogenerate the list of enum values +INSTANTIATE_TEST_SUITE_P( + , olGetDeviceInfoSizeTest, ::testing::ValuesIn(DeviceQueries), + [](const ::testing::TestParamInfo &info) { + std::stringstream ss; + ss << info.param; + return ss.str(); + }); + +TEST_P(olGetDeviceInfoSizeTest, Success) { + ol_device_info_t InfoType = GetParam(); + size_t Size = 0; + + ASSERT_SUCCESS(olGetDeviceInfoSize(Device, InfoType, &Size)); + auto ExpectedSize = DeviceInfoSizeMap.find(InfoType); + if (ExpectedSize != DeviceInfoSizeMap.end()) { + ASSERT_EQ(Size, ExpectedSize->second); + } else { + ASSERT_NE(Size, 0lu); + } +} + +TEST_F(olGetDeviceInfoSizeTest, InvalidNullHandle) { + size_t Size = 0; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetDeviceInfoSize(nullptr, OL_DEVICE_INFO_TYPE, &Size)); +} + +TEST_F(olGetDeviceInfoSizeTest, InvalidDeviceInfoEnumeration) { + size_t Size = 0; + ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, + olGetDeviceInfoSize(Device, OL_DEVICE_INFO_FORCE_UINT32, &Size)); +} + +TEST_F(olGetDeviceInfoSizeTest, InvalidNullPointer) { + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetDeviceInfoSize(Device, OL_DEVICE_INFO_TYPE, nullptr)); +} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp new file mode 100644 index 0000000000000..4a2f9e8ac7741 --- /dev/null +++ b/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp @@ -0,0 +1,28 @@ +//===------- Offload API tests - olGetPlatform -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +using olGetPlatformTest = offloadTest; + +TEST_F(olGetPlatformTest, Success) { + uint32_t PlatformCount; + ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount)); + std::vector Platforms(PlatformCount); + ASSERT_SUCCESS(olGetPlatform(PlatformCount, Platforms.data())); +} + +TEST_F(olGetPlatformTest, InvalidNumEntries) { + uint32_t PlatformCount; + ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount)); + std::vector Platforms(PlatformCount); + ASSERT_ERROR(OL_ERRC_INVALID_SIZE, + olGetPlatform(PlatformCount + 1, Platforms.data())); +} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp new file mode 100644 index 0000000000000..15b4b6abcd70d --- /dev/null +++ b/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp @@ -0,0 +1,22 @@ +//===------- Offload API tests - olGetPlatformCount ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +using olGetPlatformCountTest = offloadTest; + +TEST_F(olGetPlatformCountTest, Success) { + uint32_t PlatformCount; + ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount)); +} + +TEST_F(olGetPlatformCountTest, InvalidNullPointer) { + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olGetPlatformCount(nullptr)); +} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp new file mode 100644 index 0000000000000..c646bdc50b7da --- /dev/null +++ b/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp @@ -0,0 +1,76 @@ +//===------- Offload API tests - olGetPlatformInfo -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../common/Fixtures.hpp" +#include "olPlatformInfo.hpp" + +struct olGetPlatformInfoTest + : offloadPlatformTest, + ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P( + olGetPlatformInfo, olGetPlatformInfoTest, + ::testing::ValuesIn(PlatformQueries), + [](const ::testing::TestParamInfo &info) { + std::stringstream ss; + ss << info.param; + return ss.str(); + }); + +TEST_P(olGetPlatformInfoTest, Success) { + size_t Size = 0; + ol_platform_info_t InfoType = GetParam(); + + ASSERT_SUCCESS(olGetPlatformInfoSize(Platform, InfoType, &Size)); + std::vector InfoData(Size); + ASSERT_SUCCESS(olGetPlatformInfo(Platform, InfoType, Size, InfoData.data())); + + // Info types with a dynamic size are all char[] so we can verify the returned + // string is the expected size. + auto ExpectedSize = PlatformInfoSizeMap.find(InfoType); + if (ExpectedSize == PlatformInfoSizeMap.end()) { + ASSERT_EQ(Size, strlen(InfoData.data()) + 1); + } +} + +TEST_F(olGetPlatformInfoTest, InvalidNullHandle) { + ol_platform_backend_t Backend; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetPlatformInfo(nullptr, OL_PLATFORM_INFO_BACKEND, + sizeof(Backend), &Backend)); +} + +TEST_F(olGetPlatformInfoTest, InvalidPlatformInfoEnumeration) { + ol_platform_backend_t Backend; + ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, + olGetPlatformInfo(Platform, OL_PLATFORM_INFO_FORCE_UINT32, + sizeof(Backend), &Backend)); +} + +TEST_F(olGetPlatformInfoTest, InvalidSizeZero) { + ol_platform_backend_t Backend; + ASSERT_ERROR( + OL_ERRC_INVALID_SIZE, + olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, 0, &Backend)); +} + +TEST_F(olGetPlatformInfoTest, InvalidSizeSmall) { + ol_platform_backend_t Backend; + ASSERT_ERROR(OL_ERRC_INVALID_SIZE, + olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, + sizeof(Backend) - 1, &Backend)); +} + +TEST_F(olGetPlatformInfoTest, InvalidNullPointerPropValue) { + ol_platform_backend_t Backend; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, + sizeof(Backend), nullptr)); +} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp new file mode 100644 index 0000000000000..7c9274082e8e4 --- /dev/null +++ b/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp @@ -0,0 +1,57 @@ +//===------- Offload API tests - olGetPlatformInfoSize ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../common/Fixtures.hpp" +#include "olPlatformInfo.hpp" + +struct olGetPlatformInfoSizeTest + : offloadPlatformTest, + ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P( + olGetPlatformInfoSize, olGetPlatformInfoSizeTest, + ::testing::ValuesIn(PlatformQueries), + [](const ::testing::TestParamInfo &info) { + std::stringstream ss; + ss << info.param; + return ss.str(); + }); + +TEST_P(olGetPlatformInfoSizeTest, Success) { + size_t Size = 0; + ol_platform_info_t InfoType = GetParam(); + + ASSERT_SUCCESS(olGetPlatformInfoSize(Platform, InfoType, &Size)); + auto ExpectedSize = PlatformInfoSizeMap.find(InfoType); + if (ExpectedSize != PlatformInfoSizeMap.end()) { + ASSERT_EQ(Size, ExpectedSize->second); + } else { + ASSERT_NE(Size, 0lu); + } +} + +TEST_F(olGetPlatformInfoSizeTest, InvalidNullHandle) { + size_t Size = 0; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetPlatformInfoSize(nullptr, OL_PLATFORM_INFO_BACKEND, &Size)); +} + +TEST_F(olGetPlatformInfoSizeTest, InvalidPlatformInfoEnumeration) { + size_t Size = 0; + ASSERT_ERROR( + OL_ERRC_INVALID_ENUMERATION, + olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_FORCE_UINT32, &Size)); +} + +TEST_F(olGetPlatformInfoSizeTest, InvalidNullPointer) { + ASSERT_ERROR( + OL_ERRC_INVALID_NULL_POINTER, + olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_BACKEND, nullptr)); +} diff --git a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp new file mode 100644 index 0000000000000..d49cdb90d321a --- /dev/null +++ b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp @@ -0,0 +1,20 @@ +//===------- Offload API tests - Helpers for platform info query testing --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include + +// TODO: We could autogenerate these + +inline std::vector PlatformQueries = { + OL_PLATFORM_INFO_NAME, OL_PLATFORM_INFO_VENDOR_NAME, + OL_PLATFORM_INFO_VERSION, OL_PLATFORM_INFO_BACKEND}; + +inline std::unordered_map PlatformInfoSizeMap = { + {OL_PLATFORM_INFO_BACKEND, sizeof(ol_platform_backend_t)}, +}; From 50249263a173d43475a60ca658048cf3b6206aba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 3 Dec 2024 11:40:30 -0500 Subject: [PATCH 088/191] MLRegAlloc: Do not use float to count number of used registers [NFC] (#118489) --- llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index d099544c2a491..03f015f8c9e32 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -559,14 +559,13 @@ class DevelopmentModeEvictionAdvisorAnalysis final float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { auto &MRI = MF.getRegInfo(); - float Ret = 0.0; + unsigned NumUsedRegs = 0; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { Register Reg = Register::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) - continue; - ++Ret; + if (!MRI.reg_nodbg_empty(Reg)) + ++NumUsedRegs; } - return Ret; + return static_cast(NumUsedRegs); } MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, From af31aa44551f1fd78aa0f177947cb523373ff2d9 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Tue, 3 Dec 2024 16:43:54 +0000 Subject: [PATCH 089/191] [LV] Pre-commit tests for fixed width VF fully unrolled loop cost model change --- .../AArch64/fully-unrolled-cost.ll | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll new file mode 100644 index 0000000000000..ab29bf8d2d52a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll @@ -0,0 +1,121 @@ +; REQUIRES: asserts +; RUN: opt < %s -mcpu=neoverse-v2 -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s + +target triple="aarch64--linux-gnu" + +; This test shows that comparison and next iteration IV have zero cost if the +; vector loop gets executed exactly once with the given VF. +define i64 @test(ptr %a, ptr %b) #0 { +; CHECK-LABEL: LV: Checking a loop in 'test' +; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 +; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost for VF 8: 26 +; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 +; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost for VF 16: 50 +; CHECK: LV: Selecting VF: vscale x 2 +entry: + br label %for.body + +exit: ; preds = %for.body + ret i64 %add + +for.body: ; preds = %entry, %for.body + %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv + %0 = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %0 to i64 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %i.iv + %1 = load i8, ptr %arrayidx2, align 1 + %conv3 = zext i8 %1 to i64 + %mul = mul nuw nsw i64 %conv3, %conv + %add = add i64 %mul, %sum + %i.iv.next = add nuw nsw i64 %i.iv, 1 + %exitcond.not = icmp eq i64 %i.iv.next, 16 + br i1 %exitcond.not, label %exit, label %for.body +} + +; Same as above, but in the next iteration IV has extra users, and thus, the cost is not zero. +define i64 @test_external_iv_user(ptr %a, ptr %b) #0 { +; CHECK-LABEL: LV: Checking a loop in 'test_external_iv_user' +; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 +; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost for VF 8: 26 +; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 +; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost for VF 16: 50 +; CHECK: LV: Selecting VF: vscale x 2 +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %i.iv + %0 = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %0 to i64 + %i.iv.next = add nuw nsw i64 %i.iv, 1 + %arrayidx2 = getelementptr inbounds nuw i8, ptr %b, i64 %i.iv.next + %1 = load i8, ptr %arrayidx2, align 1 + %conv3 = zext i8 %1 to i64 + %mul = mul nuw nsw i64 %conv3, %conv + %add = add i64 %sum, %mul + %exitcond.not = icmp eq i64 %i.iv.next, 16 + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %for.body + ret i64 %add +} + +; Same as above but with two IVs without extra users. They all have zero cost when VF equals the number of iterations. +define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 { +; CHECK-LABEL: LV: Checking a loop in 'test_two_ivs' +; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 8: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 +; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost for VF 8: 27 +; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 +; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] +; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 +; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost for VF 16: 51 +; CHECK: LV: Selecting VF: 16 +entry: + br label %for.body + +exit: ; preds = %for.body + ret i64 %add + +for.body: ; preds = %entry, %for.body + %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] + %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv + %0 = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %0 to i64 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %j.iv + %1 = load i8, ptr %arrayidx2, align 1 + %conv3 = zext i8 %1 to i64 + %mul = mul nuw nsw i64 %conv3, %conv + %add = add i64 %mul, %sum + %i.iv.next = add nuw nsw i64 %i.iv, 1 + %j.iv.next = add nuw nsw i64 %j.iv, 1 + %exitcond.not = icmp eq i64 %i.iv.next, 16 + br i1 %exitcond.not, label %exit, label %for.body +} + +attributes #0 = { vscale_range(1, 16) "target-features"="+sve" } From 1f20eee6dc367bd202895e3eedb03974a628ef16 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 3 Dec 2024 17:55:26 +0100 Subject: [PATCH 090/191] [SPIR-V] Emit OpConstant instead of OpConstantNull to conform to NonSemantic.Shader.DebugInfo.100 DebugTypeBasic's flags definition (#118333) This PR is to fix https://github.com/llvm/llvm-project/issues/118011 by emitting OpConstant instead of OpConstantNull to conform to NonSemantic.Shader.DebugInfo.100 DebugTypeBasic's flags definition. --- llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp | 2 +- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 6 +++--- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 3 ++- llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp index d3e323efaee91..b98cef0a4f07f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp @@ -268,7 +268,7 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { // We aren't extracting any DebugInfoFlags now so we // emitting zero to use as Flags argument for DebugBasicType const Register I32ZeroReg = - GR->buildConstantInt(0, MIRBuilder, I32Ty, false); + GR->buildConstantInt(0, MIRBuilder, I32Ty, false, false); // We need to store pairs because further instructions reference // the DIBasicTypes and size will be always small so there isn't diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 9ac659f6b4f11..91b9cbcf15128 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -325,8 +325,8 @@ Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I, Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, - SPIRVType *SpvType, - bool EmitIR) { + SPIRVType *SpvType, bool EmitIR, + bool ZeroAsNull) { assert(SpvType); auto &MF = MIRBuilder.getMF(); const IntegerType *LLVMIntTy = @@ -348,7 +348,7 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, } else { Register SpvTypeReg = getSPIRVTypeID(SpvType); MachineInstrBuilder MIB; - if (Val) { + if (Val || !ZeroAsNull) { MIB = MIRBuilder.buildInstr(SPIRV::OpConstantI) .addDef(Res) .addUse(SpvTypeReg); diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index ff4b0ea8757fa..df92325ed1980 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -509,7 +509,8 @@ class SPIRVGlobalRegistry { public: Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, - SPIRVType *SpvType, bool EmitIR = true); + SPIRVType *SpvType, bool EmitIR = true, + bool ZeroAsNull = true); Register getOrCreateConstInt(uint64_t Val, MachineInstr &I, SPIRVType *SpvType, const SPIRVInstrInfo &TII, bool ZeroAsNull = true); diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll index 03aedeb97dbb4..090fe1578a36b 100644 --- a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll +++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll @@ -10,7 +10,7 @@ ; CHECK-MIR-DAG: [[type_i32:%[0-9]+\:type]] = OpTypeInt 32, 0 ; CHECK-MIR-DAG: [[encoding_signedchar:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i32]], 5 ; CHECK-MIR-DAG: [[encoding_float:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i32]], 3 -; CHECK-MIR-DAG: [[flag_zero:%[0-9]+\:iid\(s32\)]] = OpConstantNull [[type_i32]] +; CHECK-MIR-DAG: [[flag_zero:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i32]], 0 ; CHECK-MIR-DAG: [[str_bool:%[0-9]+\:id\(s32\)]] = OpString 1819242338, 0 ; CHECK-MIR-DAG: [[size_8bits:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i32]], 8 ; CHECK-MIR-DAG: [[encoding_boolean:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i32]], 2 @@ -63,7 +63,7 @@ ; CHECK-SPIRV-DAG: [[type_int16:%[0-9]+]] = OpTypeInt 16 0 ; CHECK-SPIRV-DAG: [[type_int32:%[0-9]+]] = OpTypeInt 32 0 ; CHECK-SPIRV-DAG: [[encoding_signedchar:%[0-9]+]] = OpConstant [[type_int32]] 5 -; CHECK-SPIRV-DAG: [[flag_zero:%[0-9]+]] = OpConstantNull [[type_int32]] +; CHECK-SPIRV-DAG: [[flag_zero:%[0-9]+]] = OpConstant [[type_int32]] 0 ; CHECK-SPIRV-DAG: [[encoding_float:%[0-9]+]] = OpConstant [[type_int32]] 3 ; CHECK-SPIRV-DAG: [[size_8bit:%[0-9]+]] = OpConstant [[type_int32]] 8 ; CHECK-SPIRV-DAG: [[encoding_boolean:%[0-9]+]] = OpConstant [[type_int32]] 2 From 68112f0f5b2209e14aae9a891481cba5709873a2 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 3 Dec 2024 08:59:58 -0800 Subject: [PATCH 091/191] libc: fixup include path and bazel stale comments (#118510) Downstream builders are having issues with this local include. Use a fuller path that's more standard throughout the codebase. Also some of the comments in the bazel overlay are stale. Remove them. Reported-by: Brooks Moses --- libc/src/__support/OSUtil/linux/exit.cpp | 2 +- utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/libc/src/__support/OSUtil/linux/exit.cpp b/libc/src/__support/OSUtil/linux/exit.cpp index 9c64ce42be185..e26b90f6b18eb 100644 --- a/libc/src/__support/OSUtil/linux/exit.cpp +++ b/libc/src/__support/OSUtil/linux/exit.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/OSUtil/linux/syscall.h" // syscall_impl #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "syscall.h" // For internal syscall function. #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl index 7d135b465bce1..82e65a728bc61 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl @@ -74,7 +74,6 @@ def libc_function( for. srcs: The .cpp files which contain the function implementation. weak: Make the symbol corresponding to the libc function "weak". - deps: The list of target dependencies if any. copts: The list of options to add to the C++ compilation command. local_defines: The preprocessor defines which will be prepended with -D and passed to the compile command of this target but not @@ -138,9 +137,6 @@ def libc_math_function( Args: name: The name of the function. - specializations: List of machine specializations available for this - function. Possible specializations are "generic", - "aarch64" and "x86_64". additional_deps: Other deps like helper cc_library targes used by the math function. """ From 56ab56c85729976f29d5de2fd73912449cb6da7c Mon Sep 17 00:00:00 2001 From: Jim B Date: Tue, 3 Dec 2024 12:03:51 -0500 Subject: [PATCH 092/191] [clang-format] Add support for `.cjs` as JavaScript file extension (#118188) Node uses `.cjs` as an extension for 'common javascript' modules. --- clang/docs/ClangFormat.rst | 2 +- clang/lib/Format/Format.cpp | 1 + clang/tools/clang-format/ClangFormat.cpp | 2 +- clang/tools/clang-format/git-clang-format | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst index e17d741b0a00e..c8f1d7f5a7758 100644 --- a/clang/docs/ClangFormat.rst +++ b/clang/docs/ClangFormat.rst @@ -49,7 +49,7 @@ to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code. supported: CSharp: .cs Java: .java - JavaScript: .mjs .js .ts + JavaScript: .js .mjs .cjs .ts Json: .json Objective-C: .m .mm Proto: .proto .protodevel diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index ee52972ce66f4..dcaac4b0d42cc 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3950,6 +3950,7 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Java; if (FileName.ends_with_insensitive(".js") || FileName.ends_with_insensitive(".mjs") || + FileName.ends_with_insensitive(".cjs") || FileName.ends_with_insensitive(".ts")) { return FormatStyle::LK_JavaScript; // (module) JavaScript or TypeScript. } diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 5481bb6b87503..28610052b9b74 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -87,7 +87,7 @@ static cl::opt AssumeFileName( "supported:\n" " CSharp: .cs\n" " Java: .java\n" - " JavaScript: .mjs .js .ts\n" + " JavaScript: .js .mjs .cjs .ts\n" " Json: .json\n" " Objective-C: .m .mm\n" " Proto: .proto .protodevel\n" diff --git a/clang/tools/clang-format/git-clang-format b/clang/tools/clang-format/git-clang-format index 6a2a2a22ec5c2..a322d4abf0ec2 100755 --- a/clang/tools/clang-format/git-clang-format +++ b/clang/tools/clang-format/git-clang-format @@ -94,7 +94,7 @@ def main(): # Other languages that clang-format supports 'proto', 'protodevel', # Protocol Buffers 'java', # Java - 'mjs', 'js', # JavaScript + 'js', 'mjs', 'cjs', # JavaScript 'ts', # TypeScript 'cs', # C Sharp 'json', # Json From 5ae613c9b9dc37ec1b0a6d76714099375288d772 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 3 Dec 2024 12:12:58 -0500 Subject: [PATCH 093/191] [NFC] Remove trailing white spaces in `clang/include/clang/Driver/Options.td` --- clang/include/clang/Driver/Options.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cb96b5daed9d3..4bc0b97ea68f2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1786,12 +1786,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling", PosFlag, NegFlag>; -def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">, +def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Generate instrumented code to collect coverage info for cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; -def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">, +def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">, Group, Visibility<[ClangOption, CLOption]>, MetaVarName<"">, - HelpText<"Generate instrumented code to collect coverage info for cold functions into /default.profraw (overridden by LLVM_PROFILE_FILE env var)">; + HelpText<"Generate instrumented code to collect coverage info for cold functions into /default.profraw (overridden by LLVM_PROFILE_FILE env var)">; def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; From afe75b4d5fcebd6fdd292ca1797de1b35cb687b0 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 3 Dec 2024 12:13:34 -0500 Subject: [PATCH 094/191] [mlir python] Add nanobind support for standalone dialects. (#117922) This PR allows out-of-tree dialects to write Python dialect modules using nanobind instead of pybind11. It may make sense to migrate in-tree dialects and some of the ODS Python infrastructure to nanobind, but that is a topic for a future change. This PR makes the following changes: * adds nanobind to the CMake and Bazel build systems. We also add robin_map to the Bazel build, which is a dependency of nanobind. * adds a PYTHON_BINDING_LIBRARY option to various CMake functions, such as declare_mlir_python_extension, allowing users to select a Python binding library. * creates a fork of mlir/include/mlir/Bindings/Python/PybindAdaptors.h named NanobindAdaptors.h. This plays the same role, using nanobind instead of pybind11. * splits CollectDiagnosticsToStringScope out of PybindAdaptors.h and into a new header mlir/include/mlir/Bindings/Python/Diagnostics.h, since it is code that is no way related to pybind11 or for that matter, Python. * changed the standalone Python extension example to have both pybind11 and nanobind variants. * changed mlir/python/mlir/dialects/python_test.py to have both pybind11 and nanobind variants. Notes: * A slightly unfortunate thing that I needed to do in the CMake integration was to use FindPython in addition to FindPython3, since nanobind's CMake integration expects the Python_ names for variables. Perhaps there's a better way to do this. --- mlir/cmake/modules/AddMLIRPython.cmake | 27 +- mlir/cmake/modules/MLIRDetectPythonEnv.cmake | 39 + mlir/docs/Bindings/Python.md | 20 +- .../examples/standalone/python/CMakeLists.txt | 22 +- .../python/StandaloneExtensionNanobind.cpp | 35 + ...on.cpp => StandaloneExtensionPybind11.cpp} | 7 +- .../{standalone.py => standalone_nanobind.py} | 2 +- .../dialects/standalone_pybind11.py | 6 + .../standalone/test/python/smoketest.py | 14 +- .../mlir/Bindings/Python/Diagnostics.h | 59 ++ .../mlir/Bindings/Python/NanobindAdaptors.h | 671 ++++++++++++++++++ .../mlir/Bindings/Python/PybindAdaptors.h | 43 +- mlir/lib/Bindings/Python/DialectLLVM.cpp | 4 +- .../Bindings/Python/TransformInterpreter.cpp | 7 +- mlir/python/CMakeLists.txt | 23 +- mlir/python/mlir/dialects/python_test.py | 17 +- mlir/python/requirements.txt | 1 + mlir/test/python/dialects/python_test.py | 59 +- mlir/test/python/lib/CMakeLists.txt | 3 +- .../python/lib/PythonTestModuleNanobind.cpp | 121 ++++ ...odule.cpp => PythonTestModulePybind11.cpp} | 4 +- utils/bazel/WORKSPACE | 18 + .../llvm-project-overlay/mlir/BUILD.bazel | 50 +- utils/bazel/third_party_build/nanobind.BUILD | 25 + utils/bazel/third_party_build/robin_map.BUILD | 12 + 25 files changed, 1184 insertions(+), 105 deletions(-) create mode 100644 mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp rename mlir/examples/standalone/python/{StandaloneExtension.cpp => StandaloneExtensionPybind11.cpp} (81%) rename mlir/examples/standalone/python/mlir_standalone/dialects/{standalone.py => standalone_nanobind.py} (78%) create mode 100644 mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py create mode 100644 mlir/include/mlir/Bindings/Python/Diagnostics.h create mode 100644 mlir/include/mlir/Bindings/Python/NanobindAdaptors.h create mode 100644 mlir/test/python/lib/PythonTestModuleNanobind.cpp rename mlir/test/python/lib/{PythonTestModule.cpp => PythonTestModulePybind11.cpp} (96%) create mode 100644 utils/bazel/third_party_build/nanobind.BUILD create mode 100644 utils/bazel/third_party_build/robin_map.BUILD diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake index 7b91f43e2d57f..67619a90c90be 100644 --- a/mlir/cmake/modules/AddMLIRPython.cmake +++ b/mlir/cmake/modules/AddMLIRPython.cmake @@ -114,10 +114,11 @@ endfunction() # EMBED_CAPI_LINK_LIBS: Dependent CAPI libraries that this extension depends # on. These will be collected for all extensions and put into an # aggregate dylib that is linked against. +# PYTHON_BINDINGS_LIBRARY: Either pybind11 or nanobind. function(declare_mlir_python_extension name) cmake_parse_arguments(ARG "" - "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT" + "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT;PYTHON_BINDINGS_LIBRARY" "SOURCES;PRIVATE_LINK_LIBS;EMBED_CAPI_LINK_LIBS" ${ARGN}) @@ -126,15 +127,20 @@ function(declare_mlir_python_extension name) endif() set(_install_destination "src/python/${name}") + if(NOT ARG_PYTHON_BINDINGS_LIBRARY) + set(ARG_PYTHON_BINDINGS_LIBRARY "pybind11") + endif() + add_library(${name} INTERFACE) set_target_properties(${name} PROPERTIES # Yes: Leading-lowercase property names are load bearing and the recommended # way to do this: https://gitlab.kitware.com/cmake/cmake/-/issues/19261 - EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS" + EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS;mlir_python_BINDINGS_LIBRARY" mlir_python_SOURCES_TYPE extension mlir_python_EXTENSION_MODULE_NAME "${ARG_MODULE_NAME}" mlir_python_EMBED_CAPI_LINK_LIBS "${ARG_EMBED_CAPI_LINK_LIBS}" mlir_python_DEPENDS "" + mlir_python_BINDINGS_LIBRARY "${ARG_PYTHON_BINDINGS_LIBRARY}" ) # Set the interface source and link_libs properties of the target @@ -223,12 +229,14 @@ function(add_mlir_python_modules name) elseif(_source_type STREQUAL "extension") # Native CPP extension. get_target_property(_module_name ${sources_target} mlir_python_EXTENSION_MODULE_NAME) + get_target_property(_bindings_library ${sources_target} mlir_python_BINDINGS_LIBRARY) # Transform relative source to based on root dir. set(_extension_target "${modules_target}.extension.${_module_name}.dso") add_mlir_python_extension(${_extension_target} "${_module_name}" INSTALL_COMPONENT ${modules_target} INSTALL_DIR "${ARG_INSTALL_PREFIX}/_mlir_libs" OUTPUT_DIRECTORY "${ARG_ROOT_PREFIX}/_mlir_libs" + PYTHON_BINDINGS_LIBRARY ${_bindings_library} LINK_LIBS PRIVATE ${sources_target} ${ARG_COMMON_CAPI_LINK_LIBS} @@ -634,7 +642,7 @@ endfunction() function(add_mlir_python_extension libname extname) cmake_parse_arguments(ARG "" - "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY" + "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY;PYTHON_BINDINGS_LIBRARY" "SOURCES;LINK_LIBS" ${ARGN}) if(ARG_UNPARSED_ARGUMENTS) @@ -644,9 +652,16 @@ function(add_mlir_python_extension libname extname) # The actual extension library produces a shared-object or DLL and has # sources that must be compiled in accordance with pybind11 needs (RTTI and # exceptions). - pybind11_add_module(${libname} - ${ARG_SOURCES} - ) + if(NOT DEFINED ARG_PYTHON_BINDINGS_LIBRARY OR ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "pybind11") + pybind11_add_module(${libname} + ${ARG_SOURCES} + ) + elseif(ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "nanobind") + nanobind_add_module(${libname} + NB_DOMAIN mlir + ${ARG_SOURCES} + ) + endif() # The extension itself must be compiled with RTTI and exceptions enabled. # Also, some warning classes triggered by pybind11 are disabled. diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake index 05397b7a1e1c7..c62ac7fa615ea 100644 --- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake +++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake @@ -21,6 +21,12 @@ macro(mlir_configure_python_dev_packages) find_package(Python3 ${LLVM_MINIMUM_PYTHON_VERSION} COMPONENTS Interpreter ${_python_development_component} REQUIRED) + + # It's a little silly to detect Python a second time, but nanobind's cmake + # code looks for Python_ not Python3_. + find_package(Python ${LLVM_MINIMUM_PYTHON_VERSION} + COMPONENTS Interpreter ${_python_development_component} REQUIRED) + unset(_python_development_component) message(STATUS "Found python include dirs: ${Python3_INCLUDE_DIRS}") message(STATUS "Found python libraries: ${Python3_LIBRARIES}") @@ -31,6 +37,13 @@ macro(mlir_configure_python_dev_packages) message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " "suffix = '${PYTHON_MODULE_SUFFIX}', " "extension = '${PYTHON_MODULE_EXTENSION}") + + mlir_detect_nanobind_install() + find_package(nanobind 2.2 CONFIG REQUIRED) + message(STATUS "Found nanobind v${nanobind_VERSION}: ${nanobind_INCLUDE_DIR}") + message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " + "suffix = '${PYTHON_MODULE_SUFFIX}', " + "extension = '${PYTHON_MODULE_EXTENSION}") endif() endmacro() @@ -58,3 +71,29 @@ function(mlir_detect_pybind11_install) set(pybind11_DIR "${PACKAGE_DIR}" PARENT_SCOPE) endif() endfunction() + + +# Detects a nanobind package installed in the current python environment +# and sets variables to allow it to be found. This allows nanobind to be +# installed via pip, which typically yields a much more recent version than +# the OS install, which will be available otherwise. +function(mlir_detect_nanobind_install) + if(nanobind_DIR) + message(STATUS "Using explicit nanobind cmake directory: ${nanobind_DIR} (-Dnanobind_DIR to change)") + else() + message(STATUS "Checking for nanobind in python path...") + execute_process( + COMMAND "${Python3_EXECUTABLE}" + -c "import nanobind;print(nanobind.cmake_dir(), end='')" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE STATUS + OUTPUT_VARIABLE PACKAGE_DIR + ERROR_QUIET) + if(NOT STATUS EQUAL "0") + message(STATUS "not found (install via 'pip install nanobind' or set nanobind_DIR)") + return() + endif() + message(STATUS "found (${PACKAGE_DIR})") + set(nanobind_DIR "${PACKAGE_DIR}" PARENT_SCOPE) + endif() +endfunction() diff --git a/mlir/docs/Bindings/Python.md b/mlir/docs/Bindings/Python.md index 6e52c4deaad9a..a0bd1cac118ba 100644 --- a/mlir/docs/Bindings/Python.md +++ b/mlir/docs/Bindings/Python.md @@ -1138,12 +1138,14 @@ attributes and types must connect to the relevant C APIs for building and inspection, which must be provided first. Bindings for `Attribute` and `Type` subclasses can be defined using [`include/mlir/Bindings/Python/PybindAdaptors.h`](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/PybindAdaptors.h) -utilities that mimic pybind11 API for defining functions and properties. These -bindings are to be included in a separate pybind11 module. The utilities also -provide automatic casting between C API handles `MlirAttribute` and `MlirType` -and their Python counterparts so that the C API handles can be used directly in -binding implementations. The methods and properties provided by the bindings -should follow the principles discussed above. +or +[`include/mlir/Bindings/Python/NanobindAdaptors.h`](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h) +utilities that mimic pybind11/nanobind API for defining functions and +properties. These bindings are to be included in a separate module. The +utilities also provide automatic casting between C API handles `MlirAttribute` +and `MlirType` and their Python counterparts so that the C API handles can be +used directly in binding implementations. The methods and properties provided by +the bindings should follow the principles discussed above. The attribute and type bindings for a dialect can be located in `lib/Bindings/Python/Dialect.cpp` and should be compiled into a separate @@ -1179,7 +1181,9 @@ make the passes available along with the dialect. Dialect functionality other than IR objects or passes, such as helper functions, can be exposed to Python similarly to attributes and types. C API is expected to exist for this functionality, which can then be wrapped using pybind11 and -`[include/mlir/Bindings/Python/PybindAdaptors.h](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/PybindAdaptors.h)` +`[include/mlir/Bindings/Python/PybindAdaptors.h](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/PybindAdaptors.h)`, +or nanobind and +`[include/mlir/Bindings/Python/NanobindAdaptors.h](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h)` utilities to connect to the rest of Python API. The bindings can be located in a -separate pybind11 module or in the same module as attributes and types, and +separate module or in the same module as attributes and types, and loaded along with the dialect. diff --git a/mlir/examples/standalone/python/CMakeLists.txt b/mlir/examples/standalone/python/CMakeLists.txt index a8c43827a5a37..69c82fd913579 100644 --- a/mlir/examples/standalone/python/CMakeLists.txt +++ b/mlir/examples/standalone/python/CMakeLists.txt @@ -17,18 +17,32 @@ declare_mlir_dialect_python_bindings( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir_standalone" TD_FILE dialects/StandaloneOps.td SOURCES - dialects/standalone.py + dialects/standalone_pybind11.py + dialects/standalone_nanobind.py DIALECT_NAME standalone) -declare_mlir_python_extension(StandalonePythonSources.Extension - MODULE_NAME _standaloneDialects + +declare_mlir_python_extension(StandalonePythonSources.Pybind11Extension + MODULE_NAME _standaloneDialectsPybind11 + ADD_TO_PARENT StandalonePythonSources + SOURCES + StandaloneExtensionPybind11.cpp + EMBED_CAPI_LINK_LIBS + StandaloneCAPI + PYTHON_BINDINGS_LIBRARY pybind11 +) + +declare_mlir_python_extension(StandalonePythonSources.NanobindExtension + MODULE_NAME _standaloneDialectsNanobind ADD_TO_PARENT StandalonePythonSources SOURCES - StandaloneExtension.cpp + StandaloneExtensionNanobind.cpp EMBED_CAPI_LINK_LIBS StandaloneCAPI + PYTHON_BINDINGS_LIBRARY nanobind ) + ################################################################################ # Common CAPI ################################################################################ diff --git a/mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp b/mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp new file mode 100644 index 0000000000000..6d83dc585dcd1 --- /dev/null +++ b/mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp @@ -0,0 +1,35 @@ +//===- StandaloneExtension.cpp - Extension module -------------------------===// +// +// This is the nanobind version of the example module. There is also a pybind11 +// example in StandaloneExtensionPybind11.cpp. +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "Standalone-c/Dialects.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" + +namespace nb = nanobind; + +NB_MODULE(_standaloneDialectsNanobind, m) { + //===--------------------------------------------------------------------===// + // standalone dialect + //===--------------------------------------------------------------------===// + auto standaloneM = m.def_submodule("standalone"); + + standaloneM.def( + "register_dialect", + [](MlirContext context, bool load) { + MlirDialectHandle handle = mlirGetDialectHandle__standalone__(); + mlirDialectHandleRegisterDialect(handle, context); + if (load) { + mlirDialectHandleLoadDialect(handle, context); + } + }, + nb::arg("context").none() = nb::none(), nb::arg("load") = true); +} diff --git a/mlir/examples/standalone/python/StandaloneExtension.cpp b/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp similarity index 81% rename from mlir/examples/standalone/python/StandaloneExtension.cpp rename to mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp index 5e83060cd48d8..397db4c20e743 100644 --- a/mlir/examples/standalone/python/StandaloneExtension.cpp +++ b/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp @@ -1,4 +1,7 @@ -//===- StandaloneExtension.cpp - Extension module -------------------------===// +//===- StandaloneExtensionPybind11.cpp - Extension module -----------------===// +// +// This is the pybind11 version of the example module. There is also a nanobind +// example in StandaloneExtensionNanobind.cpp. // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,7 +14,7 @@ using namespace mlir::python::adaptors; -PYBIND11_MODULE(_standaloneDialects, m) { +PYBIND11_MODULE(_standaloneDialectsPybind11, m) { //===--------------------------------------------------------------------===// // standalone dialect //===--------------------------------------------------------------------===// diff --git a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone.py b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_nanobind.py similarity index 78% rename from mlir/examples/standalone/python/mlir_standalone/dialects/standalone.py rename to mlir/examples/standalone/python/mlir_standalone/dialects/standalone_nanobind.py index c958b2ac19368..6218720951c82 100644 --- a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone.py +++ b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_nanobind.py @@ -3,4 +3,4 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from ._standalone_ops_gen import * -from .._mlir_libs._standaloneDialects.standalone import * +from .._mlir_libs._standaloneDialectsNanobind.standalone import * diff --git a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py new file mode 100644 index 0000000000000..bfb98e404e13f --- /dev/null +++ b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py @@ -0,0 +1,6 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from ._standalone_ops_gen import * +from .._mlir_libs._standaloneDialectsPybind11.standalone import * diff --git a/mlir/examples/standalone/test/python/smoketest.py b/mlir/examples/standalone/test/python/smoketest.py index 08e08cbd2fe24..bd40c65d16164 100644 --- a/mlir/examples/standalone/test/python/smoketest.py +++ b/mlir/examples/standalone/test/python/smoketest.py @@ -1,7 +1,17 @@ -# RUN: %python %s | FileCheck %s +# RUN: %python %s pybind11 | FileCheck %s +# RUN: %python %s nanobind | FileCheck %s +import sys from mlir_standalone.ir import * -from mlir_standalone.dialects import builtin as builtin_d, standalone as standalone_d +from mlir_standalone.dialects import builtin as builtin_d + +if sys.argv[1] == "pybind11": + from mlir_standalone.dialects import standalone_pybind11 as standalone_d +elif sys.argv[1] == "nanobind": + from mlir_standalone.dialects import standalone_nanobind as standalone_d +else: + raise ValueError("Expected either pybind11 or nanobind as arguments") + with Context(): standalone_d.register_dialect() diff --git a/mlir/include/mlir/Bindings/Python/Diagnostics.h b/mlir/include/mlir/Bindings/Python/Diagnostics.h new file mode 100644 index 0000000000000..ea80e14dde0f3 --- /dev/null +++ b/mlir/include/mlir/Bindings/Python/Diagnostics.h @@ -0,0 +1,59 @@ +//===- Diagnostics.h - Helpers for diagnostics in Python bindings ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_BINDINGS_PYTHON_DIAGNOSTICS_H +#define MLIR_BINDINGS_PYTHON_DIAGNOSTICS_H + +#include +#include + +#include "mlir-c/Diagnostics.h" +#include "mlir-c/IR.h" +#include "llvm/ADT/StringRef.h" + +namespace mlir { +namespace python { + +/// RAII scope intercepting all diagnostics into a string. The message must be +/// checked before this goes out of scope. +class CollectDiagnosticsToStringScope { +public: + explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { + handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, + /*deleteUserData=*/nullptr); + } + ~CollectDiagnosticsToStringScope() { + assert(errorMessage.empty() && "unchecked error message"); + mlirContextDetachDiagnosticHandler(context, handlerID); + } + + [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } + +private: + static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { + auto printer = +[](MlirStringRef message, void *data) { + *static_cast(data) += + llvm::StringRef(message.data, message.length); + }; + MlirLocation loc = mlirDiagnosticGetLocation(diag); + *static_cast(data) += "at "; + mlirLocationPrint(loc, printer, data); + *static_cast(data) += ": "; + mlirDiagnosticPrint(diag, printer, data); + return mlirLogicalResultSuccess(); + } + + MlirContext context; + MlirDiagnosticHandlerID handlerID; + std::string errorMessage = ""; +}; + +} // namespace python +} // namespace mlir + +#endif // MLIR_BINDINGS_PYTHON_DIAGNOSTICS_H diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h new file mode 100644 index 0000000000000..5e01cebcb09c9 --- /dev/null +++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h @@ -0,0 +1,671 @@ +//===- NanobindAdaptors.h - Interop with MLIR APIs via nanobind -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains adaptors for clients of the core MLIR Python APIs to +// interop via MLIR CAPI types, using nanobind. The facilities here do not +// depend on implementation details of the MLIR Python API and do not introduce +// C++-level dependencies with it (requiring only Python and CAPI-level +// dependencies). +// +// It is encouraged to be used both in-tree and out-of-tree. For in-tree use +// cases, it should be used for dialect implementations (versus relying on +// Pybind-based internals of the core libraries). +//===----------------------------------------------------------------------===// + +#ifndef MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H +#define MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H + +#include +#include + +#include + +#include "mlir-c/Bindings/Python/Interop.h" +#include "mlir-c/Diagnostics.h" +#include "mlir-c/IR.h" +#include "llvm/ADT/Twine.h" + +// Raw CAPI type casters need to be declared before use, so always include them +// first. +namespace nanobind { +namespace detail { + +/// Helper to convert a presumed MLIR API object to a capsule, accepting either +/// an explicit Capsule (which can happen when two C APIs are communicating +/// directly via Python) or indirectly by querying the MLIR_PYTHON_CAPI_PTR_ATTR +/// attribute (through which supported MLIR Python API objects export their +/// contained API pointer as a capsule). Throws a type error if the object is +/// neither. This is intended to be used from type casters, which are invoked +/// with a raw handle (unowned). The returned object's lifetime may not extend +/// beyond the apiObject handle without explicitly having its refcount increased +/// (i.e. on return). +static nanobind::object mlirApiObjectToCapsule(nanobind::handle apiObject) { + if (PyCapsule_CheckExact(apiObject.ptr())) + return nanobind::borrow(apiObject); + if (!nanobind::hasattr(apiObject, MLIR_PYTHON_CAPI_PTR_ATTR)) { + std::string repr = nanobind::cast(nanobind::repr(apiObject)); + throw nanobind::type_error( + (llvm::Twine("Expected an MLIR object (got ") + repr + ").") + .str() + .c_str()); + } + return apiObject.attr(MLIR_PYTHON_CAPI_PTR_ATTR); +} + +// Note: Currently all of the following support cast from nanobind::object to +// the Mlir* C-API type, but only a few light-weight, context-bound ones +// implicitly cast the other way because the use case has not yet emerged and +// ownership is unclear. + +/// Casts object <-> MlirAffineMap. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirAffineMap, const_name("MlirAffineMap")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToAffineMap(capsule.ptr()); + if (mlirAffineMapIsNull(value)) { + return false; + } + return !mlirAffineMapIsNull(value); + } + static handle from_cpp(MlirAffineMap v, rv_policy, + cleanup_list *cleanup) noexcept { + nanobind::object capsule = + nanobind::steal(mlirPythonAffineMapToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("AffineMap") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + } +}; + +/// Casts object <-> MlirAttribute. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirAttribute, const_name("MlirAttribute")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToAttribute(capsule.ptr()); + return !mlirAttributeIsNull(value); + } + static handle from_cpp(MlirAttribute v, rv_policy, + cleanup_list *cleanup) noexcept { + nanobind::object capsule = + nanobind::steal(mlirPythonAttributeToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Attribute") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() + .release(); + } +}; + +/// Casts object -> MlirBlock. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirBlock, const_name("MlirBlock")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToBlock(capsule.ptr()); + return !mlirBlockIsNull(value); + } +}; + +/// Casts object -> MlirContext. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirContext, const_name("MlirContext")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + if (src.is_none()) { + // Gets the current thread-bound context. + // TODO: This raises an error of "No current context" currently. + // Update the implementation to pretty-print the helpful error that the + // core implementations print in this case. + src = nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Context") + .attr("current"); + } + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToContext(capsule.ptr()); + return !mlirContextIsNull(value); + } +}; + +/// Casts object <-> MlirDialectRegistry. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirDialectRegistry, const_name("MlirDialectRegistry")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToDialectRegistry(capsule.ptr()); + return !mlirDialectRegistryIsNull(value); + } + static handle from_cpp(MlirDialectRegistry v, rv_policy, + cleanup_list *cleanup) noexcept { + nanobind::object capsule = nanobind::steal( + mlirPythonDialectRegistryToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("DialectRegistry") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + } +}; + +/// Casts object <-> MlirLocation. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirLocation, const_name("MlirLocation")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + if (src.is_none()) { + // Gets the current thread-bound context. + src = nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Location") + .attr("current"); + } + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToLocation(capsule.ptr()); + return !mlirLocationIsNull(value); + } + static handle from_cpp(MlirLocation v, rv_policy, + cleanup_list *cleanup) noexcept { + nanobind::object capsule = + nanobind::steal(mlirPythonLocationToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Location") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + } +}; + +/// Casts object <-> MlirModule. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirModule, const_name("MlirModule")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToModule(capsule.ptr()); + return !mlirModuleIsNull(value); + } + static handle from_cpp(MlirModule v, rv_policy, + cleanup_list *cleanup) noexcept { + nanobind::object capsule = + nanobind::steal(mlirPythonModuleToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Module") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + }; +}; + +/// Casts object <-> MlirFrozenRewritePatternSet. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirFrozenRewritePatternSet, + const_name("MlirFrozenRewritePatternSet")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); + return value.ptr != nullptr; + } + static handle from_cpp(MlirFrozenRewritePatternSet v, rv_policy, handle) { + nanobind::object capsule = nanobind::steal( + mlirPythonFrozenRewritePatternSetToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("rewrite")) + .attr("FrozenRewritePatternSet") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + }; +}; + +/// Casts object <-> MlirOperation. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirOperation, const_name("MlirOperation")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToOperation(capsule.ptr()); + return !mlirOperationIsNull(value); + } + static handle from_cpp(MlirOperation v, rv_policy, + cleanup_list *cleanup) noexcept { + if (v.ptr == nullptr) + return nanobind::none(); + nanobind::object capsule = + nanobind::steal(mlirPythonOperationToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Operation") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + }; +}; + +/// Casts object <-> MlirValue. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirValue, const_name("MlirValue")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToValue(capsule.ptr()); + return !mlirValueIsNull(value); + } + static handle from_cpp(MlirValue v, rv_policy, + cleanup_list *cleanup) noexcept { + if (v.ptr == nullptr) + return nanobind::none(); + nanobind::object capsule = + nanobind::steal(mlirPythonValueToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Value") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() + .release(); + }; +}; + +/// Casts object -> MlirPassManager. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirPassManager, const_name("MlirPassManager")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToPassManager(capsule.ptr()); + return !mlirPassManagerIsNull(value); + } +}; + +/// Casts object <-> MlirTypeID. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirTypeID, const_name("MlirTypeID")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToTypeID(capsule.ptr()); + return !mlirTypeIDIsNull(value); + } + static handle from_cpp(MlirTypeID v, rv_policy, + cleanup_list *cleanup) noexcept { + if (v.ptr == nullptr) + return nanobind::none(); + nanobind::object capsule = + nanobind::steal(mlirPythonTypeIDToCapsule(v)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("TypeID") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .release(); + }; +}; + +/// Casts object <-> MlirType. +template <> +struct type_caster { + NB_TYPE_CASTER(MlirType, const_name("MlirType")); + bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { + nanobind::object capsule = mlirApiObjectToCapsule(src); + value = mlirPythonCapsuleToType(capsule.ptr()); + return !mlirTypeIsNull(value); + } + static handle from_cpp(MlirType t, rv_policy, + cleanup_list *cleanup) noexcept { + nanobind::object capsule = + nanobind::steal(mlirPythonTypeToCapsule(t)); + return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Type") + .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) + .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() + .release(); + } +}; + +} // namespace detail +} // namespace nanobind + +namespace mlir { +namespace python { +namespace nanobind_adaptors { + +/// Provides a facility like nanobind::class_ for defining a new class in a +/// scope, but this allows extension of an arbitrary Python class, defining +/// methods on it is a similar way. Classes defined in this way are very similar +/// to if defined in Python in the usual way but use nanobind machinery to +/// do it. These are not "real" nanobind classes but pure Python classes +/// with no relation to a concrete C++ class. +/// +/// Derived from a discussion upstream: +/// https://github.com/pybind/pybind11/issues/1193 +/// (plus a fair amount of extra curricular poking) +/// TODO: If this proves useful, see about including it in nanobind. +class pure_subclass { +public: + pure_subclass(nanobind::handle scope, const char *derivedClassName, + const nanobind::object &superClass) { + nanobind::object pyType = + nanobind::borrow((PyObject *)&PyType_Type); + nanobind::object metaclass = pyType(superClass); + nanobind::dict attributes; + + thisClass = metaclass(derivedClassName, nanobind::make_tuple(superClass), + attributes); + scope.attr(derivedClassName) = thisClass; + } + + template + pure_subclass &def(const char *name, Func &&f, const Extra &...extra) { + nanobind::object cf = nanobind::cpp_function( + std::forward(f), nanobind::name(name), nanobind::is_method(), + nanobind::scope(thisClass), extra...); + thisClass.attr(name) = cf; + return *this; + } + + template + pure_subclass &def_property_readonly(const char *name, Func &&f, + const Extra &...extra) { + nanobind::object cf = nanobind::cpp_function( + std::forward(f), nanobind::name(name), nanobind::is_method(), + nanobind::scope(thisClass), extra...); + auto builtinProperty = + nanobind::borrow((PyObject *)&PyProperty_Type); + thisClass.attr(name) = builtinProperty(cf); + return *this; + } + + template + pure_subclass &def_staticmethod(const char *name, Func &&f, + const Extra &...extra) { + static_assert(!std::is_member_function_pointer::value, + "def_staticmethod(...) called with a non-static member " + "function pointer"); + nanobind::object cf = nanobind::cpp_function( + std::forward(f), + nanobind::name(name), // nanobind::scope(thisClass), + extra...); + thisClass.attr(name) = cf; + return *this; + } + + template + pure_subclass &def_classmethod(const char *name, Func &&f, + const Extra &...extra) { + static_assert(!std::is_member_function_pointer::value, + "def_classmethod(...) called with a non-static member " + "function pointer"); + nanobind::object cf = nanobind::cpp_function( + std::forward(f), + nanobind::name(name), // nanobind::scope(thisClass), + extra...); + thisClass.attr(name) = + nanobind::borrow(PyClassMethod_New(cf.ptr())); + return *this; + } + + nanobind::object get_class() const { return thisClass; } + +protected: + nanobind::object superClass; + nanobind::object thisClass; +}; + +/// Creates a custom subclass of mlir.ir.Attribute, implementing a casting +/// constructor and type checking methods. +class mlir_attribute_subclass : public pure_subclass { +public: + using IsAFunctionTy = bool (*)(MlirAttribute); + using GetTypeIDFunctionTy = MlirTypeID (*)(); + + /// Subclasses by looking up the super-class dynamically. + mlir_attribute_subclass(nanobind::handle scope, const char *attrClassName, + IsAFunctionTy isaFunction, + GetTypeIDFunctionTy getTypeIDFunction = nullptr) + : mlir_attribute_subclass( + scope, attrClassName, isaFunction, + nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Attribute"), + getTypeIDFunction) {} + + /// Subclasses with a provided mlir.ir.Attribute super-class. This must + /// be used if the subclass is being defined in the same extension module + /// as the mlir.ir class (otherwise, it will trigger a recursive + /// initialization). + mlir_attribute_subclass(nanobind::handle scope, const char *typeClassName, + IsAFunctionTy isaFunction, + const nanobind::object &superCls, + GetTypeIDFunctionTy getTypeIDFunction = nullptr) + : pure_subclass(scope, typeClassName, superCls) { + // Casting constructor. Note that it hard, if not impossible, to properly + // call chain to parent `__init__` in nanobind due to its special handling + // for init functions that don't have a fully constructed self-reference, + // which makes it impossible to forward it to `__init__` of a superclass. + // Instead, provide a custom `__new__` and call that of a superclass, which + // eventually calls `__init__` of the superclass. Since attribute subclasses + // have no additional members, we can just return the instance thus created + // without amending it. + std::string captureTypeName( + typeClassName); // As string in case if typeClassName is not static. + nanobind::object newCf = nanobind::cpp_function( + [superCls, isaFunction, captureTypeName]( + nanobind::object cls, nanobind::object otherAttribute) { + MlirAttribute rawAttribute = + nanobind::cast(otherAttribute); + if (!isaFunction(rawAttribute)) { + auto origRepr = + nanobind::cast(nanobind::repr(otherAttribute)); + throw std::invalid_argument( + (llvm::Twine("Cannot cast attribute to ") + captureTypeName + + " (from " + origRepr + ")") + .str()); + } + nanobind::object self = superCls.attr("__new__")(cls, otherAttribute); + return self; + }, + nanobind::name("__new__"), nanobind::arg("cls"), + nanobind::arg("cast_from_attr")); + thisClass.attr("__new__") = newCf; + + // 'isinstance' method. + def_staticmethod( + "isinstance", + [isaFunction](MlirAttribute other) { return isaFunction(other); }, + nanobind::arg("other_attribute")); + def("__repr__", [superCls, captureTypeName](nanobind::object self) { + return nanobind::repr(superCls(self)) + .attr("replace")(superCls.attr("__name__"), captureTypeName); + }); + if (getTypeIDFunction) { + def_staticmethod("get_static_typeid", + [getTypeIDFunction]() { return getTypeIDFunction(); }); + nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( + getTypeIDFunction())(nanobind::cpp_function( + [thisClass = thisClass](const nanobind::object &mlirAttribute) { + return thisClass(mlirAttribute); + })); + } + } +}; + +/// Creates a custom subclass of mlir.ir.Type, implementing a casting +/// constructor and type checking methods. +class mlir_type_subclass : public pure_subclass { +public: + using IsAFunctionTy = bool (*)(MlirType); + using GetTypeIDFunctionTy = MlirTypeID (*)(); + + /// Subclasses by looking up the super-class dynamically. + mlir_type_subclass(nanobind::handle scope, const char *typeClassName, + IsAFunctionTy isaFunction, + GetTypeIDFunctionTy getTypeIDFunction = nullptr) + : mlir_type_subclass( + scope, typeClassName, isaFunction, + nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Type"), + getTypeIDFunction) {} + + /// Subclasses with a provided mlir.ir.Type super-class. This must + /// be used if the subclass is being defined in the same extension module + /// as the mlir.ir class (otherwise, it will trigger a recursive + /// initialization). + mlir_type_subclass(nanobind::handle scope, const char *typeClassName, + IsAFunctionTy isaFunction, + const nanobind::object &superCls, + GetTypeIDFunctionTy getTypeIDFunction = nullptr) + : pure_subclass(scope, typeClassName, superCls) { + // Casting constructor. Note that it hard, if not impossible, to properly + // call chain to parent `__init__` in nanobind due to its special handling + // for init functions that don't have a fully constructed self-reference, + // which makes it impossible to forward it to `__init__` of a superclass. + // Instead, provide a custom `__new__` and call that of a superclass, which + // eventually calls `__init__` of the superclass. Since attribute subclasses + // have no additional members, we can just return the instance thus created + // without amending it. + std::string captureTypeName( + typeClassName); // As string in case if typeClassName is not static. + nanobind::object newCf = nanobind::cpp_function( + [superCls, isaFunction, captureTypeName](nanobind::object cls, + nanobind::object otherType) { + MlirType rawType = nanobind::cast(otherType); + if (!isaFunction(rawType)) { + auto origRepr = + nanobind::cast(nanobind::repr(otherType)); + throw std::invalid_argument((llvm::Twine("Cannot cast type to ") + + captureTypeName + " (from " + + origRepr + ")") + .str()); + } + nanobind::object self = superCls.attr("__new__")(cls, otherType); + return self; + }, + nanobind::name("__new__"), nanobind::arg("cls"), + nanobind::arg("cast_from_type")); + thisClass.attr("__new__") = newCf; + + // 'isinstance' method. + def_staticmethod( + "isinstance", + [isaFunction](MlirType other) { return isaFunction(other); }, + nanobind::arg("other_type")); + def("__repr__", [superCls, captureTypeName](nanobind::object self) { + return nanobind::repr(superCls(self)) + .attr("replace")(superCls.attr("__name__"), captureTypeName); + }); + if (getTypeIDFunction) { + // 'get_static_typeid' method. + // This is modeled as a static method instead of a static property because + // `def_property_readonly_static` is not available in `pure_subclass` and + // we do not want to introduce the complexity that pybind uses to + // implement it. + def_staticmethod("get_static_typeid", + [getTypeIDFunction]() { return getTypeIDFunction(); }); + nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( + getTypeIDFunction())(nanobind::cpp_function( + [thisClass = thisClass](const nanobind::object &mlirType) { + return thisClass(mlirType); + })); + } + } +}; + +/// Creates a custom subclass of mlir.ir.Value, implementing a casting +/// constructor and type checking methods. +class mlir_value_subclass : public pure_subclass { +public: + using IsAFunctionTy = bool (*)(MlirValue); + + /// Subclasses by looking up the super-class dynamically. + mlir_value_subclass(nanobind::handle scope, const char *valueClassName, + IsAFunctionTy isaFunction) + : mlir_value_subclass( + scope, valueClassName, isaFunction, + nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("Value")) {} + + /// Subclasses with a provided mlir.ir.Value super-class. This must + /// be used if the subclass is being defined in the same extension module + /// as the mlir.ir class (otherwise, it will trigger a recursive + /// initialization). + mlir_value_subclass(nanobind::handle scope, const char *valueClassName, + IsAFunctionTy isaFunction, + const nanobind::object &superCls) + : pure_subclass(scope, valueClassName, superCls) { + // Casting constructor. Note that it hard, if not impossible, to properly + // call chain to parent `__init__` in nanobind due to its special handling + // for init functions that don't have a fully constructed self-reference, + // which makes it impossible to forward it to `__init__` of a superclass. + // Instead, provide a custom `__new__` and call that of a superclass, which + // eventually calls `__init__` of the superclass. Since attribute subclasses + // have no additional members, we can just return the instance thus created + // without amending it. + std::string captureValueName( + valueClassName); // As string in case if valueClassName is not static. + nanobind::object newCf = nanobind::cpp_function( + [superCls, isaFunction, captureValueName](nanobind::object cls, + nanobind::object otherValue) { + MlirValue rawValue = nanobind::cast(otherValue); + if (!isaFunction(rawValue)) { + auto origRepr = + nanobind::cast(nanobind::repr(otherValue)); + throw std::invalid_argument((llvm::Twine("Cannot cast value to ") + + captureValueName + " (from " + + origRepr + ")") + .str()); + } + nanobind::object self = superCls.attr("__new__")(cls, otherValue); + return self; + }, + nanobind::name("__new__"), nanobind::arg("cls"), + nanobind::arg("cast_from_value")); + thisClass.attr("__new__") = newCf; + + // 'isinstance' method. + def_staticmethod( + "isinstance", + [isaFunction](MlirValue other) { return isaFunction(other); }, + nanobind::arg("other_value")); + } +}; + +} // namespace nanobind_adaptors + +/// RAII scope intercepting all diagnostics into a string. The message must be +/// checked before this goes out of scope. +class CollectDiagnosticsToStringScope { +public: + explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { + handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, + /*deleteUserData=*/nullptr); + } + ~CollectDiagnosticsToStringScope() { + assert(errorMessage.empty() && "unchecked error message"); + mlirContextDetachDiagnosticHandler(context, handlerID); + } + + [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } + +private: + static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { + auto printer = +[](MlirStringRef message, void *data) { + *static_cast(data) += + llvm::StringRef(message.data, message.length); + }; + MlirLocation loc = mlirDiagnosticGetLocation(diag); + *static_cast(data) += "at "; + mlirLocationPrint(loc, printer, data); + *static_cast(data) += ": "; + mlirDiagnosticPrint(diag, printer, data); + return mlirLogicalResultSuccess(); + } + + MlirContext context; + MlirDiagnosticHandlerID handlerID; + std::string errorMessage = ""; +}; + +} // namespace python +} // namespace mlir + +#endif // MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H diff --git a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h index df4b9bf713592..c8233355d1d67 100644 --- a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h +++ b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h @@ -1,4 +1,4 @@ -//===- PybindAdaptors.h - Adaptors for interop with MLIR APIs -------------===// +//===- PybindAdaptors.h - Interop with MLIR APIs via pybind11 -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// // This file contains adaptors for clients of the core MLIR Python APIs to -// interop via MLIR CAPI types. The facilities here do not depend on -// implementation details of the MLIR Python API and do not introduce C++-level -// dependencies with it (requiring only Python and CAPI-level dependencies). +// interop via MLIR CAPI types, using pybind11. The facilities here do not +// depend on implementation details of the MLIR Python API and do not introduce +// C++-level dependencies with it (requiring only Python and CAPI-level +// dependencies). // // It is encouraged to be used both in-tree and out-of-tree. For in-tree use // cases, it should be used for dialect implementations (versus relying on @@ -611,40 +612,6 @@ class mlir_value_subclass : public pure_subclass { } // namespace adaptors -/// RAII scope intercepting all diagnostics into a string. The message must be -/// checked before this goes out of scope. -class CollectDiagnosticsToStringScope { -public: - explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { - handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, - /*deleteUserData=*/nullptr); - } - ~CollectDiagnosticsToStringScope() { - assert(errorMessage.empty() && "unchecked error message"); - mlirContextDetachDiagnosticHandler(context, handlerID); - } - - [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } - -private: - static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { - auto printer = +[](MlirStringRef message, void *data) { - *static_cast(data) += - llvm::StringRef(message.data, message.length); - }; - MlirLocation loc = mlirDiagnosticGetLocation(diag); - *static_cast(data) += "at "; - mlirLocationPrint(loc, printer, data); - *static_cast(data) += ": "; - mlirDiagnosticPrint(diag, printer, data); - return mlirLogicalResultSuccess(); - } - - MlirContext context; - MlirDiagnosticHandlerID handlerID; - std::string errorMessage = ""; -}; - } // namespace python } // namespace mlir diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp index 42a4c8c0793ba..cccf1370b8cc8 100644 --- a/mlir/lib/Bindings/Python/DialectLLVM.cpp +++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir-c/Dialect/LLVM.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" +#include "mlir/Bindings/Python/Diagnostics.h" #include "mlir/Bindings/Python/PybindAdaptors.h" -#include namespace py = pybind11; using namespace llvm; diff --git a/mlir/lib/Bindings/Python/TransformInterpreter.cpp b/mlir/lib/Bindings/Python/TransformInterpreter.cpp index f6b4532b1b6be..0c8c0e0a965aa 100644 --- a/mlir/lib/Bindings/Python/TransformInterpreter.cpp +++ b/mlir/lib/Bindings/Python/TransformInterpreter.cpp @@ -10,14 +10,15 @@ // //===----------------------------------------------------------------------===// +#include +#include + #include "mlir-c/Dialect/Transform/Interpreter.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" +#include "mlir/Bindings/Python/Diagnostics.h" #include "mlir/Bindings/Python/PybindAdaptors.h" -#include -#include - namespace py = pybind11; namespace { diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 23187f256455b..e1b870b53ad25 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -683,7 +683,9 @@ if(MLIR_INCLUDE_TESTS) MLIRPythonTestSources.Dialects.PythonTest ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" ADD_TO_PARENT MLIRPythonTestSources.Dialects - SOURCES dialects/python_test.py) + SOURCES + dialects/python_test.py + ) set(LLVM_TARGET_DEFINITIONS "${MLIR_MAIN_SRC_DIR}/test/python/python_test_ops.td") mlir_tablegen( @@ -697,12 +699,25 @@ if(MLIR_INCLUDE_TESTS) ADD_TO_PARENT MLIRPythonTestSources.Dialects.PythonTest SOURCES "dialects/_python_test_ops_gen.py") - declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtension - MODULE_NAME _mlirPythonTest + declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionPybind11 + MODULE_NAME _mlirPythonTestPybind11 + ADD_TO_PARENT MLIRPythonTestSources.Dialects + ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib" + PYTHON_BINDINGS_LIBRARY pybind11 + SOURCES + PythonTestModulePybind11.cpp + PRIVATE_LINK_LIBS + LLVMSupport + EMBED_CAPI_LINK_LIBS + MLIRCAPIPythonTestDialect + ) + declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionNanobind + MODULE_NAME _mlirPythonTestNanobind ADD_TO_PARENT MLIRPythonTestSources.Dialects ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib" + PYTHON_BINDINGS_LIBRARY nanobind SOURCES - PythonTestModule.cpp + PythonTestModuleNanobind.cpp PRIVATE_LINK_LIBS LLVMSupport EMBED_CAPI_LINK_LIBS diff --git a/mlir/python/mlir/dialects/python_test.py b/mlir/python/mlir/dialects/python_test.py index b5baa80bc767f..9380896c8c06e 100644 --- a/mlir/python/mlir/dialects/python_test.py +++ b/mlir/python/mlir/dialects/python_test.py @@ -3,15 +3,14 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from ._python_test_ops_gen import * -from .._mlir_libs._mlirPythonTest import ( - TestAttr, - TestType, - TestTensorValue, - TestIntegerRankedTensorType, -) -def register_python_test_dialect(registry): - from .._mlir_libs import _mlirPythonTest +def register_python_test_dialect(registry, use_nanobind): + if use_nanobind: + from .._mlir_libs import _mlirPythonTestNanobind - _mlirPythonTest.register_dialect(registry) + _mlirPythonTestNanobind.register_dialect(registry) + else: + from .._mlir_libs import _mlirPythonTestPybind11 + + _mlirPythonTestPybind11.register_dialect(registry) diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index 272d066831f92..ab8a9122919e1 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,3 +1,4 @@ +nanobind>=2.0, <3.0 numpy>=1.19.5, <=2.1.2 pybind11>=2.10.0, <=2.13.6 PyYAML>=5.4.0, <=6.0.1 diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py index 948d1225ea489..fd678f8321fd9 100644 --- a/mlir/test/python/dialects/python_test.py +++ b/mlir/test/python/dialects/python_test.py @@ -1,12 +1,33 @@ -# RUN: %PYTHON %s | FileCheck %s +# RUN: %PYTHON %s pybind11 | FileCheck %s +# RUN: %PYTHON %s nanobind | FileCheck %s +import sys from mlir.ir import * import mlir.dialects.func as func import mlir.dialects.python_test as test import mlir.dialects.tensor as tensor import mlir.dialects.arith as arith -test.register_python_test_dialect(get_dialect_registry()) +if sys.argv[1] == "pybind11": + from mlir._mlir_libs._mlirPythonTestPybind11 import ( + TestAttr, + TestType, + TestTensorValue, + TestIntegerRankedTensorType, + ) + + test.register_python_test_dialect(get_dialect_registry(), use_nanobind=False) +elif sys.argv[1] == "nanobind": + from mlir._mlir_libs._mlirPythonTestNanobind import ( + TestAttr, + TestType, + TestTensorValue, + TestIntegerRankedTensorType, + ) + + test.register_python_test_dialect(get_dialect_registry(), use_nanobind=True) +else: + raise ValueError("Expected pybind11 or nanobind as argument") def run(f): @@ -308,7 +329,7 @@ def testOptionalOperandOp(): @run def testCustomAttribute(): with Context() as ctx, Location.unknown(): - a = test.TestAttr.get() + a = TestAttr.get() # CHECK: #python_test.test_attr print(a) @@ -325,11 +346,11 @@ def testCustomAttribute(): print(repr(op2.test_attr)) # The following cast must not assert. - b = test.TestAttr(a) + b = TestAttr(a) unit = UnitAttr.get() try: - test.TestAttr(unit) + TestAttr(unit) except ValueError as e: assert "Cannot cast attribute to TestAttr" in str(e) else: @@ -338,7 +359,7 @@ def testCustomAttribute(): # The following must trigger a TypeError from our adaptors and must not # crash. try: - test.TestAttr(42) + TestAttr(42) except TypeError as e: assert "Expected an MLIR object" in str(e) else: @@ -347,7 +368,7 @@ def testCustomAttribute(): # The following must trigger a TypeError from pybind (therefore, not # checking its message) and must not crash. try: - test.TestAttr(42, 56) + TestAttr(42, 56) except TypeError: pass else: @@ -357,12 +378,12 @@ def testCustomAttribute(): @run def testCustomType(): with Context() as ctx: - a = test.TestType.get() + a = TestType.get() # CHECK: !python_test.test_type print(a) # The following cast must not assert. - b = test.TestType(a) + b = TestType(a) # Instance custom types should have typeids assert isinstance(b.typeid, TypeID) # Subclasses of ir.Type should not have a static_typeid @@ -374,7 +395,7 @@ def testCustomType(): i8 = IntegerType.get_signless(8) try: - test.TestType(i8) + TestType(i8) except ValueError as e: assert "Cannot cast type to TestType" in str(e) else: @@ -383,7 +404,7 @@ def testCustomType(): # The following must trigger a TypeError from our adaptors and must not # crash. try: - test.TestType(42) + TestType(42) except TypeError as e: assert "Expected an MLIR object" in str(e) else: @@ -392,7 +413,7 @@ def testCustomType(): # The following must trigger a TypeError from pybind (therefore, not # checking its message) and must not crash. try: - test.TestType(42, 56) + TestType(42, 56) except TypeError: pass else: @@ -405,7 +426,7 @@ def testTensorValue(): with Context() as ctx, Location.unknown(): i8 = IntegerType.get_signless(8) - class Tensor(test.TestTensorValue): + class Tensor(TestTensorValue): def __str__(self): return super().__str__().replace("Value", "Tensor") @@ -425,9 +446,9 @@ def __str__(self): # Classes of custom types that inherit from concrete types should have # static_typeid - assert isinstance(test.TestIntegerRankedTensorType.static_typeid, TypeID) + assert isinstance(TestIntegerRankedTensorType.static_typeid, TypeID) # And it should be equal to the in-tree concrete type - assert test.TestIntegerRankedTensorType.static_typeid == t.type.typeid + assert TestIntegerRankedTensorType.static_typeid == t.type.typeid d = tensor.EmptyOp([1, 2, 3], IntegerType.get_signless(5)).result # CHECK: Value(%{{.*}} = tensor.empty() : tensor<1x2x3xi5>) @@ -491,7 +512,7 @@ def inferReturnTypeComponents(): @run def testCustomTypeTypeCaster(): with Context() as ctx, Location.unknown(): - a = test.TestType.get() + a = TestType.get() assert a.typeid is not None b = Type.parse("!python_test.test_type") @@ -500,7 +521,7 @@ def testCustomTypeTypeCaster(): # CHECK: TestType(!python_test.test_type) print(repr(b)) - c = test.TestIntegerRankedTensorType.get([10, 10], 5) + c = TestIntegerRankedTensorType.get([10, 10], 5) # CHECK: tensor<10x10xi5> print(c) # CHECK: TestIntegerRankedTensorType(tensor<10x10xi5>) @@ -511,7 +532,7 @@ def testCustomTypeTypeCaster(): @register_type_caster(c.typeid) def type_caster(pytype): - return test.TestIntegerRankedTensorType(pytype) + return TestIntegerRankedTensorType(pytype) except RuntimeError as e: print(e) @@ -530,7 +551,7 @@ def type_caster(pytype): @register_type_caster(c.typeid, replace=True) def type_caster(pytype): - return test.TestIntegerRankedTensorType(pytype) + return TestIntegerRankedTensorType(pytype) d = tensor.EmptyOp([10, 10], IntegerType.get_signless(5)).result # CHECK: tensor<10x10xi5> diff --git a/mlir/test/python/lib/CMakeLists.txt b/mlir/test/python/lib/CMakeLists.txt index d7cbbfbc21477..198ed8211e773 100644 --- a/mlir/test/python/lib/CMakeLists.txt +++ b/mlir/test/python/lib/CMakeLists.txt @@ -1,7 +1,8 @@ set(LLVM_OPTIONAL_SOURCES PythonTestCAPI.cpp PythonTestDialect.cpp - PythonTestModule.cpp + PythonTestModulePybind11.cpp + PythonTestModuleNanobind.cpp ) add_mlir_library(MLIRPythonTestDialect diff --git a/mlir/test/python/lib/PythonTestModuleNanobind.cpp b/mlir/test/python/lib/PythonTestModuleNanobind.cpp new file mode 100644 index 0000000000000..7c504d04be0d1 --- /dev/null +++ b/mlir/test/python/lib/PythonTestModuleNanobind.cpp @@ -0,0 +1,121 @@ +//===- PythonTestModuleNanobind.cpp - PythonTest dialect extension --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This is the nanobind edition of the PythonTest dialect module. +//===----------------------------------------------------------------------===// + +#include +#include + +#include "PythonTestCAPI.h" +#include "mlir-c/BuiltinAttributes.h" +#include "mlir-c/BuiltinTypes.h" +#include "mlir-c/IR.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" + +namespace nb = nanobind; +using namespace mlir::python::nanobind_adaptors; + +static bool mlirTypeIsARankedIntegerTensor(MlirType t) { + return mlirTypeIsARankedTensor(t) && + mlirTypeIsAInteger(mlirShapedTypeGetElementType(t)); +} + +NB_MODULE(_mlirPythonTestNanobind, m) { + m.def( + "register_python_test_dialect", + [](MlirContext context, bool load) { + MlirDialectHandle pythonTestDialect = + mlirGetDialectHandle__python_test__(); + mlirDialectHandleRegisterDialect(pythonTestDialect, context); + if (load) { + mlirDialectHandleLoadDialect(pythonTestDialect, context); + } + }, + nb::arg("context"), nb::arg("load") = true); + + m.def( + "register_dialect", + [](MlirDialectRegistry registry) { + MlirDialectHandle pythonTestDialect = + mlirGetDialectHandle__python_test__(); + mlirDialectHandleInsertDialect(pythonTestDialect, registry); + }, + nb::arg("registry")); + + mlir_attribute_subclass(m, "TestAttr", + mlirAttributeIsAPythonTestTestAttribute, + mlirPythonTestTestAttributeGetTypeID) + .def_classmethod( + "get", + [](const nb::object &cls, MlirContext ctx) { + return cls(mlirPythonTestTestAttributeGet(ctx)); + }, + nb::arg("cls"), nb::arg("context").none() = nb::none()); + + mlir_type_subclass(m, "TestType", mlirTypeIsAPythonTestTestType, + mlirPythonTestTestTypeGetTypeID) + .def_classmethod( + "get", + [](const nb::object &cls, MlirContext ctx) { + return cls(mlirPythonTestTestTypeGet(ctx)); + }, + nb::arg("cls"), nb::arg("context").none() = nb::none()); + + auto typeCls = + mlir_type_subclass(m, "TestIntegerRankedTensorType", + mlirTypeIsARankedIntegerTensor, + nb::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr("RankedTensorType")) + .def_classmethod( + "get", + [](const nb::object &cls, std::vector shape, + unsigned width, MlirContext ctx) { + MlirAttribute encoding = mlirAttributeGetNull(); + return cls(mlirRankedTensorTypeGet( + shape.size(), shape.data(), mlirIntegerTypeGet(ctx, width), + encoding)); + }, + nb::arg("cls"), nb::arg("shape"), nb::arg("width"), + nb::arg("context").none() = nb::none()); + + assert(nb::hasattr(typeCls.get_class(), "static_typeid") && + "TestIntegerRankedTensorType has no static_typeid"); + + MlirTypeID mlirRankedTensorTypeID = mlirRankedTensorTypeGetTypeID(); + + nb::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( + mlirRankedTensorTypeID, nb::arg("replace") = true)( + nanobind::cpp_function([typeCls](const nb::object &mlirType) { + return typeCls.get_class()(mlirType); + })); + + auto valueCls = mlir_value_subclass(m, "TestTensorValue", + mlirTypeIsAPythonTestTestTensorValue) + .def("is_null", [](MlirValue &self) { + return mlirValueIsNull(self); + }); + + nb::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) + .attr(MLIR_PYTHON_CAPI_VALUE_CASTER_REGISTER_ATTR)( + mlirRankedTensorTypeID)( + nanobind::cpp_function([valueCls](const nb::object &valueObj) { + nb::object capsule = mlirApiObjectToCapsule(valueObj); + MlirValue v = mlirPythonCapsuleToValue(capsule.ptr()); + MlirType t = mlirValueGetType(v); + // This is hyper-specific in order to exercise/test registering a + // value caster from cpp (but only for a single test case; see + // testTensorValue python_test.py). + if (mlirShapedTypeHasStaticShape(t) && + mlirShapedTypeGetDimSize(t, 0) == 1 && + mlirShapedTypeGetDimSize(t, 1) == 2 && + mlirShapedTypeGetDimSize(t, 2) == 3) + return valueCls.get_class()(valueObj); + return valueObj; + })); +} diff --git a/mlir/test/python/lib/PythonTestModule.cpp b/mlir/test/python/lib/PythonTestModulePybind11.cpp similarity index 96% rename from mlir/test/python/lib/PythonTestModule.cpp rename to mlir/test/python/lib/PythonTestModulePybind11.cpp index a4f538dcb5594..94a5f5178d16e 100644 --- a/mlir/test/python/lib/PythonTestModule.cpp +++ b/mlir/test/python/lib/PythonTestModulePybind11.cpp @@ -5,6 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// This is the pybind11 edition of the PythonTest dialect module. +//===----------------------------------------------------------------------===// #include "PythonTestCAPI.h" #include "mlir-c/BuiltinAttributes.h" @@ -21,7 +23,7 @@ static bool mlirTypeIsARankedIntegerTensor(MlirType t) { mlirTypeIsAInteger(mlirShapedTypeGetElementType(t)); } -PYBIND11_MODULE(_mlirPythonTest, m) { +PYBIND11_MODULE(_mlirPythonTestPybind11, m) { m.def( "register_python_test_dialect", [](MlirContext context, bool load) { diff --git a/utils/bazel/WORKSPACE b/utils/bazel/WORKSPACE index 7baca11eed3d3..66ba1ac1b17e1 100644 --- a/utils/bazel/WORKSPACE +++ b/utils/bazel/WORKSPACE @@ -148,6 +148,24 @@ maybe( url = "https://github.com/pybind/pybind11/archive/v2.10.3.zip", ) +maybe( + http_archive, + name = "robin_map", + strip_prefix = "robin-map-1.3.0", + sha256 = "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236", + build_file = "@llvm-raw//utils/bazel/third_party_build:robin_map.BUILD", + url = "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz", +) + +maybe( + http_archive, + name = "nanobind", + build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", + sha256 = "bfbfc7e5759f1669e4ddb48752b1ddc5647d1430e94614d6f8626df1d508e65a", + strip_prefix = "nanobind-2.2.0", + url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.2.0.tar.gz", +) + load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") py_repositories() diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 179fed2f5e9a0..544becfa30b40 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -932,7 +932,6 @@ exports_files( filegroup( name = "MLIRBindingsPythonHeaderFiles", srcs = glob([ - "lib/Bindings/Python/*.h", "include/mlir-c/Bindings/Python/*.h", "include/mlir/Bindings/Python/*.h", ]), @@ -942,12 +941,10 @@ cc_library( name = "MLIRBindingsPythonHeaders", includes = [ "include", - "lib/Bindings/Python", ], textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], deps = [ ":CAPIIRHeaders", - ":CAPITransformsHeaders", "@pybind11", "@rules_python//python/cc:current_py_cc_headers", ], @@ -957,17 +954,41 @@ cc_library( name = "MLIRBindingsPythonHeadersAndDeps", includes = [ "include", - "lib/Bindings/Python", ], textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], deps = [ ":CAPIIR", - ":CAPITransforms", "@pybind11", "@rules_python//python/cc:current_py_cc_headers", ], ) +cc_library( + name = "MLIRBindingsPythonNanobindHeaders", + includes = [ + "include", + ], + textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], + deps = [ + ":CAPIIRHeaders", + "@nanobind", + "@rules_python//python/cc:current_py_cc_headers", + ], +) + +cc_library( + name = "MLIRBindingsPythonNanobindHeadersAndDeps", + includes = [ + "include", + ], + textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], + deps = [ + ":CAPIIR", + "@nanobind", + "@rules_python//python/cc:current_py_cc_headers", + ], +) + # These flags are needed for pybind11 to work. PYBIND11_COPTS = [ "-fexceptions", @@ -993,16 +1014,25 @@ filegroup( ], ) +filegroup( + name = "MLIRBindingsPythonCoreHeaders", + srcs = glob([ + "lib/Bindings/Python/*.h", + ]), +) + cc_library( name = "MLIRBindingsPythonCore", srcs = [":MLIRBindingsPythonSourceFiles"], copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, + textual_hdrs = [":MLIRBindingsPythonCoreHeaders"], deps = [ ":CAPIAsync", ":CAPIDebug", ":CAPIIR", ":CAPIInterfaces", + ":CAPITransforms", ":MLIRBindingsPythonHeadersAndDeps", ":Support", ":config", @@ -1017,10 +1047,12 @@ cc_library( srcs = [":MLIRBindingsPythonSourceFiles"], copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, + textual_hdrs = [":MLIRBindingsPythonCoreHeaders"], deps = [ ":CAPIAsyncHeaders", ":CAPIDebugHeaders", ":CAPIIRHeaders", + ":CAPITransformsHeaders", ":MLIRBindingsPythonHeaders", ":Support", ":config", @@ -1050,6 +1082,9 @@ cc_binary( # These flags are needed for pybind11 to work. copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, + includes = [ + "lib/Bindings/Python", + ], linkshared = 1, linkstatic = 0, deps = [ @@ -1063,6 +1098,9 @@ cc_binary( srcs = ["lib/Bindings/Python/DialectLinalg.cpp"], copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, + includes = [ + "lib/Bindings/Python", + ], linkshared = 1, linkstatic = 0, deps = [ @@ -8448,9 +8486,9 @@ cc_library( hdrs = ["include/mlir/Conversion/ConvertToLLVM/ToLLVMPass.h"], includes = ["include"], deps = [ + ":Analysis", ":ConversionPassIncGen", ":ConvertToLLVMInterface", - ":Analysis", ":IR", ":LLVMCommonConversion", ":LLVMDialect", diff --git a/utils/bazel/third_party_build/nanobind.BUILD b/utils/bazel/third_party_build/nanobind.BUILD new file mode 100644 index 0000000000000..262d14a040b87 --- /dev/null +++ b/utils/bazel/third_party_build/nanobind.BUILD @@ -0,0 +1,25 @@ +cc_library( + name = "nanobind", + srcs = glob( + [ + "src/*.cpp", + ], + exclude = ["src/nb_combined.cpp"], + ), + defines = [ + "NB_BUILD=1", + "NB_SHARED=1", + ], + includes = ["include"], + textual_hdrs = glob( + [ + "include/**/*.h", + "src/*.h", + ], + ), + visibility = ["//visibility:public"], + deps = [ + "@robin_map", + "@rules_python//python/cc:current_py_cc_headers", + ], +) diff --git a/utils/bazel/third_party_build/robin_map.BUILD b/utils/bazel/third_party_build/robin_map.BUILD new file mode 100644 index 0000000000000..b8d04beaed81f --- /dev/null +++ b/utils/bazel/third_party_build/robin_map.BUILD @@ -0,0 +1,12 @@ +cc_library( + name = "robin_map", + hdrs = [ + "include/tsl/robin_growth_policy.h", + "include/tsl/robin_hash.h", + "include/tsl/robin_map.h", + "include/tsl/robin_set.h", + ], + includes = ["."], + strip_include_prefix = "include", + visibility = ["//visibility:public"], +) From 04996a28b7639a333c5d04bf4d10d70bda3e0173 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 3 Dec 2024 09:17:17 -0800 Subject: [PATCH 095/191] [ELF] Rename target-specific RelExpr enumerators RelExpr enumerators are named `R_*`, which can be confused with ELF relocation type names. Rename the target-specific ones to `RE_*` to avoid confusion. For consistency, the target-independent ones can be renamed as well, but that's not urgent. The relocation processing mechanism with RelExpr has non-trivial overhead compared with mold's approach, and we might make more code into Arch/*.cpp files and decrease the enumerators. Pull Request: https://github.com/llvm/llvm-project/pull/118424 --- lld/ELF/Arch/AArch64.cpp | 20 ++++----- lld/ELF/Arch/ARM.cpp | 6 +-- lld/ELF/Arch/LoongArch.cpp | 22 +++++----- lld/ELF/Arch/Mips.cpp | 18 ++++---- lld/ELF/Arch/PPC.cpp | 2 +- lld/ELF/Arch/PPC64.cpp | 16 +++---- lld/ELF/Arch/RISCV.cpp | 8 ++-- lld/ELF/InputSection.cpp | 74 +++++++++++++++---------------- lld/ELF/Relocations.cpp | 83 ++++++++++++++++++----------------- lld/ELF/Relocations.h | 64 +++++++++++++-------------- lld/ELF/SyntheticSections.cpp | 4 +- 11 files changed, 159 insertions(+), 158 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 5b5ad482ea127..99fc750486e4b 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -154,9 +154,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_MOVW_UABS_G3: return R_ABS; case R_AARCH64_AUTH_ABS64: - return R_AARCH64_AUTH; + return RE_AARCH64_AUTH; case R_AARCH64_TLSDESC_ADR_PAGE21: - return R_AARCH64_TLSDESC_PAGE; + return RE_AARCH64_TLSDESC_PAGE; case R_AARCH64_TLSDESC_LD64_LO12: case R_AARCH64_TLSDESC_ADD_LO12: return R_TLSDESC; @@ -198,15 +198,15 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, return R_PC; case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_ADR_PREL_PG_HI21_NC: - return R_AARCH64_PAGE_PC; + return RE_AARCH64_PAGE_PC; case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: return R_GOT; case R_AARCH64_LD64_GOTPAGE_LO15: - return R_AARCH64_GOT_PAGE; + return RE_AARCH64_GOT_PAGE; case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: - return R_AARCH64_GOT_PAGE_PC; + return RE_AARCH64_GOT_PAGE_PC; case R_AARCH64_GOTPCREL32: case R_AARCH64_GOT_LD_PREL19: return R_GOT_PC; @@ -222,7 +222,7 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const { if (expr == R_RELAX_TLS_GD_TO_IE) { if (type == R_AARCH64_TLSDESC_ADR_PAGE21) - return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC; + return RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC; return R_RELAX_TLS_GD_TO_IE_ABS; } return expr; @@ -877,7 +877,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, if (val != llvm::SignExtend64(val, 33)) return false; - Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21, + Relocation adrpSymRel = {RE_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21, adrpRel.offset, /*addend=*/0, &sym}; Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset, /*addend=*/0, &sym}; @@ -922,21 +922,21 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } switch (rel.expr) { - case R_AARCH64_GOT_PAGE_PC: + case RE_AARCH64_GOT_PAGE_PC: if (i + 1 < size && relaxer.tryRelaxAdrpLdr(rel, sec.relocs()[i + 1], secAddr, buf)) { ++i; continue; } break; - case R_AARCH64_PAGE_PC: + case RE_AARCH64_PAGE_PC: if (i + 1 < size && relaxer.tryRelaxAdrpAdd(rel, sec.relocs()[i + 1], secAddr, buf)) { ++i; continue; } break; - case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: + case RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE_ABS: relaxTlsGdToIe(loc, rel, val); continue; diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 62685b1e7dede..29a72d35af666 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -136,7 +136,7 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, // GOT(S) + A - P return R_GOT_PC; case R_ARM_SBREL32: - return R_ARM_SBREL; + return RE_ARM_SBREL; case R_ARM_TARGET1: return ctx.arg.target1Rel ? R_PC : R_ABS; case R_ARM_TARGET2: @@ -176,14 +176,14 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, case R_ARM_THM_ALU_PREL_11_0: case R_ARM_THM_PC8: case R_ARM_THM_PC12: - return R_ARM_PCA; + return RE_ARM_PCA; case R_ARM_MOVW_BREL_NC: case R_ARM_MOVW_BREL: case R_ARM_MOVT_BREL: case R_ARM_THM_MOVW_BREL_NC: case R_ARM_THM_MOVW_BREL: case R_ARM_THM_MOVT_BREL: - return R_ARM_SBREL; + return RE_ARM_SBREL; case R_ARM_NONE: return R_NONE; case R_ARM_TLS_LE32: diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ebfdbafc9983e..3280c34cb6ed0 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -428,7 +428,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_SUB_ULEB128: // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse // the RelExpr to avoid code duplication. - return R_RISCV_ADD; + return RE_RISCV_ADD; case R_LARCH_32_PCREL: case R_LARCH_64_PCREL: case R_LARCH_PCREL20_S2: @@ -444,17 +444,17 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_TLS_IE_PC_HI20: case R_LARCH_TLS_IE64_PC_LO20: case R_LARCH_TLS_IE64_PC_HI12: - return R_LOONGARCH_GOT_PAGE_PC; + return RE_LOONGARCH_GOT_PAGE_PC; case R_LARCH_GOT_PC_LO12: case R_LARCH_TLS_IE_PC_LO12: - return R_LOONGARCH_GOT; + return RE_LOONGARCH_GOT; case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: - return R_LOONGARCH_TLSGD_PAGE_PC; + return RE_LOONGARCH_TLSGD_PAGE_PC; case R_LARCH_PCALA_HI20: - // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT - // anyway so why waste time checking only to get everything relaxed back to - // it? + // Why not RE_LOONGARCH_PAGE_PC, majority of references don't go through + // PLT anyway so why waste time checking only to get everything relaxed back + // to it? // // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want // both the HI20 and LO12 to potentially refer to the PLT. But in reality @@ -474,12 +474,12 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, // // So, unfortunately we have to again workaround this quirk the same way as // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only - // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later + // relaxing back to RE_LOONGARCH_PAGE_PC if it's known not so at a later // stage. - return R_LOONGARCH_PLT_PAGE_PC; + return RE_LOONGARCH_PLT_PAGE_PC; case R_LARCH_PCALA64_LO20: case R_LARCH_PCALA64_HI12: - return R_LOONGARCH_PAGE_PC; + return RE_LOONGARCH_PAGE_PC; case R_LARCH_GOT_HI20: case R_LARCH_GOT_LO12: case R_LARCH_GOT64_LO20: @@ -501,7 +501,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_TLS_DESC_PC_HI20: case R_LARCH_TLS_DESC64_PC_LO20: case R_LARCH_TLS_DESC64_PC_HI12: - return R_LOONGARCH_TLSDESC_PAGE_PC; + return RE_LOONGARCH_TLSDESC_PAGE_PC; case R_LARCH_TLS_DESC_PC_LO12: case R_LARCH_TLS_DESC_LD: case R_LARCH_TLS_DESC_HI20: diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index da76820de240d..121127ae6b21d 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -105,7 +105,7 @@ RelExpr MIPS::getRelExpr(RelType type, const Symbol &s, case R_MIPS_GPREL32: case R_MICROMIPS_GPREL16: case R_MICROMIPS_GPREL7_S2: - return R_MIPS_GOTREL; + return RE_MIPS_GOTREL; case R_MIPS_26: case R_MICROMIPS_26_S1: return R_PLT; @@ -122,9 +122,9 @@ RelExpr MIPS::getRelExpr(RelType type, const Symbol &s, // equal to the start of .got section. In that case we consider these // relocations as relative. if (&s == ctx.sym.mipsGpDisp) - return R_MIPS_GOT_GP_PC; + return RE_MIPS_GOT_GP_PC; if (&s == ctx.sym.mipsLocalGp) - return R_MIPS_GOT_GP; + return RE_MIPS_GOT_GP; [[fallthrough]]; case R_MIPS_32: case R_MIPS_64: @@ -163,14 +163,14 @@ RelExpr MIPS::getRelExpr(RelType type, const Symbol &s, case R_MIPS_GOT16: case R_MICROMIPS_GOT16: if (s.isLocal()) - return R_MIPS_GOT_LOCAL_PAGE; + return RE_MIPS_GOT_LOCAL_PAGE; [[fallthrough]]; case R_MIPS_CALL16: case R_MIPS_GOT_DISP: case R_MIPS_TLS_GOTTPREL: case R_MICROMIPS_CALL16: case R_MICROMIPS_TLS_GOTTPREL: - return R_MIPS_GOT_OFF; + return RE_MIPS_GOT_OFF; case R_MIPS_CALL_HI16: case R_MIPS_CALL_LO16: case R_MIPS_GOT_HI16: @@ -179,15 +179,15 @@ RelExpr MIPS::getRelExpr(RelType type, const Symbol &s, case R_MICROMIPS_CALL_LO16: case R_MICROMIPS_GOT_HI16: case R_MICROMIPS_GOT_LO16: - return R_MIPS_GOT_OFF32; + return RE_MIPS_GOT_OFF32; case R_MIPS_GOT_PAGE: - return R_MIPS_GOT_LOCAL_PAGE; + return RE_MIPS_GOT_LOCAL_PAGE; case R_MIPS_TLS_GD: case R_MICROMIPS_TLS_GD: - return R_MIPS_TLSGD; + return RE_MIPS_TLSGD; case R_MIPS_TLS_LDM: case R_MICROMIPS_TLS_LDM: - return R_MIPS_TLSLD; + return RE_MIPS_TLSLD; case R_MIPS_NONE: return R_NONE; default: diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 417401374436a..3203e27d82fa2 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -250,7 +250,7 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s, case R_PPC_REL24: return R_PLT_PC; case R_PPC_PLTREL24: - return R_PPC32_PLTREL; + return RE_PPC32_PLTREL; case R_PPC_GOT_TLSGD16: return R_TLSGD_GOT; case R_PPC_GOT_TLSLD16: diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index b55385625a1cf..4edb6af4f09ad 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -1029,12 +1029,12 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: - return ctx.arg.tocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; + return ctx.arg.tocOptimize ? RE_PPC64_RELAX_TOC : R_GOTREL; case R_PPC64_TOC: - return R_PPC64_TOCBASE; + return RE_PPC64_TOCBASE; case R_PPC64_REL14: case R_PPC64_REL24: - return R_PPC64_CALL_PLT; + return RE_PPC64_CALL_PLT; case R_PPC64_REL24_NOTOC: return R_PLT_PC; case R_PPC64_REL16_LO: @@ -1452,7 +1452,7 @@ bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, // If the offset exceeds the range of the branch type then it will need // a range-extending thunk. - // See the comment in getRelocTargetVA() about R_PPC64_CALL. + // See the comment in getRelocTargetVA() about RE_PPC64_CALL. return !inBranchRange( type, branchAddr, s.getVA(ctx, a) + getPPC64GlobalEntryToLocalEntryOffset(ctx, s.stOther)); @@ -1490,7 +1490,7 @@ RelExpr PPC64::adjustGotPcExpr(RelType type, int64_t addend, // It only makes sense to optimize pld since paddi means that the address // of the object in the GOT is required rather than the object itself. if ((readPrefixedInst(ctx, loc) & 0xfc000000) == 0xe4000000) - return R_PPC64_RELAX_GOT_PC; + return RE_PPC64_RELAX_GOT_PC; } return R_GOT_PC; } @@ -1574,7 +1574,7 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint8_t *loc = buf + rel.offset; const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset); switch (rel.expr) { - case R_PPC64_RELAX_GOT_PC: { + case RE_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. // We can only relax R_PPC64_PCREL_OPT if we have also relaxed @@ -1588,7 +1588,7 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { relaxGot(loc, rel, val); break; } - case R_PPC64_RELAX_TOC: + case RE_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC // entry, there may be R_PPC64_TOC16_HA not paired with @@ -1598,7 +1598,7 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { !tryRelaxPPC64TocIndirection(ctx, rel, loc)) relocate(loc, rel, val); break; - case R_PPC64_CALL: + case RE_PPC64_CALL: // If this is a call to __tls_get_addr, it may be part of a TLS // sequence that has been relaxed and turned into a nop. In this // case, we don't want to handle it as a call. diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 58a71fd9545c5..e150ff26fc3b5 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -282,7 +282,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_SUB16: case R_RISCV_SUB32: case R_RISCV_SUB64: - return R_RISCV_ADD; + return RE_RISCV_ADD; case R_RISCV_JAL: case R_RISCV_BRANCH: case R_RISCV_PCREL_HI20: @@ -299,7 +299,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, return R_GOT_PC; case R_RISCV_PCREL_LO12_I: case R_RISCV_PCREL_LO12_S: - return R_RISCV_PC_INDIRECT; + return RE_RISCV_PC_INDIRECT; case R_RISCV_TLSDESC_HI20: case R_RISCV_TLSDESC_LOAD_LO12: case R_RISCV_TLSDESC_ADD_LO12: @@ -321,7 +321,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, return ctx.arg.relax ? R_RELAX_HINT : R_NONE; case R_RISCV_SET_ULEB128: case R_RISCV_SUB_ULEB128: - return R_RISCV_LEB128; + return RE_RISCV_LEB128; default: Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v << ") against symbol " << &s; @@ -650,7 +650,7 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { else tlsdescToIe(ctx, loc, rel, val); continue; - case R_RISCV_LEB128: + case RE_RISCV_LEB128: if (i + 1 < size) { const Relocation &rel1 = relocs[i + 1]; if (rel.type == R_RISCV_SET_ULEB128 && diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 30c2ff4d79ba5..7e76bae19fc6a 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -523,7 +523,7 @@ void InputSection::copyRelocations(Ctx &ctx, uint8_t *buf, addend = target.getImplicitAddend(bufLoc, type); if (ctx.arg.emachine == EM_MIPS && - target.getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { + target.getRelExpr(type, sym, bufLoc) == RE_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But // relocatable files produced by a compiler or a linker @@ -655,7 +655,7 @@ static uint64_t getARMStaticBase(const Symbol &sym) { return os->ptLoad->firstSec->addr; } -// For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually +// For RE_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually // points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA // is calculated using PCREL_HI20's symbol. // @@ -772,25 +772,25 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_DTPREL: case R_RELAX_TLS_LD_TO_LE_ABS: case R_RELAX_GOT_PC_NOPIC: - case R_AARCH64_AUTH: - case R_RISCV_ADD: - case R_RISCV_LEB128: + case RE_AARCH64_AUTH: + case RE_RISCV_ADD: + case RE_RISCV_LEB128: return r.sym->getVA(ctx, a); case R_ADDEND: return a; case R_RELAX_HINT: return 0; - case R_ARM_SBREL: + case RE_ARM_SBREL: return r.sym->getVA(ctx, a) - getARMStaticBase(*r.sym); case R_GOT: case R_RELAX_TLS_GD_TO_IE_ABS: return r.sym->getGotVA(ctx) + a; - case R_LOONGARCH_GOT: + case RE_LOONGARCH_GOT: // The LoongArch TLS GD relocs reuse the R_LARCH_GOT_PC_LO12 reloc r.type // for their page offsets. The arithmetics are different in the TLS case // so we have to duplicate some logic here. if (r.sym->hasFlag(NEEDS_TLSGD) && r.type != R_LARCH_TLS_IE_PC_LO12) - // Like R_LOONGARCH_TLSGD_PAGE_PC but taking the absolute value. + // Like RE_LOONGARCH_TLSGD_PAGE_PC but taking the absolute value. return ctx.in.got->getGlobalDynAddr(*r.sym) + a; return r.sym->getGotVA(ctx) + a; case R_GOTONLY_PC: @@ -798,7 +798,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_GOTPLTONLY_PC: return ctx.in.gotPlt->getVA() + a - p; case R_GOTREL: - case R_PPC64_RELAX_TOC: + case RE_PPC64_RELAX_TOC: return r.sym->getVA(ctx, a) - ctx.in.got->getVA(); case R_GOTPLTREL: return r.sym->getVA(ctx, a) - ctx.in.gotPlt->getVA(); @@ -809,10 +809,10 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: return r.sym->getGotOffset(ctx) + a; - case R_AARCH64_GOT_PAGE_PC: - case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: + case RE_AARCH64_GOT_PAGE_PC: + case RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: return getAArch64Page(r.sym->getGotVA(ctx) + a) - getAArch64Page(p); - case R_AARCH64_GOT_PAGE: + case RE_AARCH64_GOT_PAGE: return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA()); case R_GOT_PC: case R_RELAX_TLS_GD_TO_IE: @@ -821,17 +821,17 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, return r.sym->getGotPltVA(ctx) + a - ctx.in.got->getVA(); case R_GOTPLT_PC: return r.sym->getGotPltVA(ctx) + a - p; - case R_LOONGARCH_GOT_PAGE_PC: + case RE_LOONGARCH_GOT_PAGE_PC: if (r.sym->hasFlag(NEEDS_TLSGD)) return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p, r.type); return getLoongArchPageDelta(r.sym->getGotVA(ctx) + a, p, r.type); - case R_MIPS_GOTREL: + case RE_MIPS_GOTREL: return r.sym->getVA(ctx, a) - ctx.in.mipsGot->getGp(file); - case R_MIPS_GOT_GP: + case RE_MIPS_GOT_GP: return ctx.in.mipsGot->getGp(file) + a; - case R_MIPS_GOT_GP_PC: { - // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC r.type iif the target + case RE_MIPS_GOT_GP_PC: { + // R_MIPS_LO16 expression has RE_MIPS_GOT_GP_PC r.type iif the target // is _gp_disp symbol. In that case we should use the following // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf @@ -845,43 +845,43 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, v -= 1; return v; } - case R_MIPS_GOT_LOCAL_PAGE: + case RE_MIPS_GOT_LOCAL_PAGE: // If relocation against MIPS local symbol requires GOT entry, this entry // should be initialized by 'page address'. This address is high 16-bits // of sum the symbol's value and the addend. return ctx.in.mipsGot->getVA() + ctx.in.mipsGot->getPageEntryOffset(file, *r.sym, a) - ctx.in.mipsGot->getGp(file); - case R_MIPS_GOT_OFF: - case R_MIPS_GOT_OFF32: + case RE_MIPS_GOT_OFF: + case RE_MIPS_GOT_OFF32: // In case of MIPS if a GOT relocation has non-zero addend this addend // should be applied to the GOT entry content not to the GOT entry offset. // That is why we use separate expression r.type. return ctx.in.mipsGot->getVA() + ctx.in.mipsGot->getSymEntryOffset(file, *r.sym, a) - ctx.in.mipsGot->getGp(file); - case R_MIPS_TLSGD: + case RE_MIPS_TLSGD: return ctx.in.mipsGot->getVA() + ctx.in.mipsGot->getGlobalDynOffset(file, *r.sym) - ctx.in.mipsGot->getGp(file); - case R_MIPS_TLSLD: + case RE_MIPS_TLSLD: return ctx.in.mipsGot->getVA() + ctx.in.mipsGot->getTlsIndexOffset(file) - ctx.in.mipsGot->getGp(file); - case R_AARCH64_PAGE_PC: { + case RE_AARCH64_PAGE_PC: { uint64_t val = r.sym->isUndefWeak() ? p + a : r.sym->getVA(ctx, a); return getAArch64Page(val) - getAArch64Page(p); } - case R_RISCV_PC_INDIRECT: { + case RE_RISCV_PC_INDIRECT: { if (const Relocation *hiRel = getRISCVPCRelHi20(ctx, this, r)) return getRelocTargetVA(ctx, *hiRel, r.sym->getVA(ctx)); return 0; } - case R_LOONGARCH_PAGE_PC: + case RE_LOONGARCH_PAGE_PC: return getLoongArchPageDelta(r.sym->getVA(ctx, a), p, r.type); case R_PC: - case R_ARM_PCA: { + case RE_ARM_PCA: { uint64_t dest; - if (r.expr == R_ARM_PCA) + if (r.expr == RE_ARM_PCA) // Some PC relative ARM (Thumb) relocations align down the place. p = p & 0xfffffffc; if (r.sym->isUndefined()) { @@ -909,20 +909,20 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_PLT: return r.sym->getPltVA(ctx) + a; case R_PLT_PC: - case R_PPC64_CALL_PLT: + case RE_PPC64_CALL_PLT: return r.sym->getPltVA(ctx) + a - p; - case R_LOONGARCH_PLT_PAGE_PC: + case RE_LOONGARCH_PLT_PAGE_PC: return getLoongArchPageDelta(r.sym->getPltVA(ctx) + a, p, r.type); case R_PLT_GOTPLT: return r.sym->getPltVA(ctx) + a - ctx.in.gotPlt->getVA(); case R_PLT_GOTREL: return r.sym->getPltVA(ctx) + a - ctx.in.got->getVA(); - case R_PPC32_PLTREL: + case RE_PPC32_PLTREL: // R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30 // stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for // target VA computation. return r.sym->getPltVA(ctx) - p; - case R_PPC64_CALL: { + case RE_PPC64_CALL: { uint64_t symVA = r.sym->getVA(ctx, a); // If we have an undefined weak symbol, we might get here with a symbol // address of zero. That could overflow, but the code must be unreachable, @@ -939,10 +939,10 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, return symVA - p + getPPC64GlobalEntryToLocalEntryOffset(ctx, r.sym->stOther); } - case R_PPC64_TOCBASE: + case RE_PPC64_TOCBASE: return getPPC64TocBase(ctx) + a; case R_RELAX_GOT_PC: - case R_PPC64_RELAX_GOT_PC: + case RE_PPC64_RELAX_GOT_PC: return r.sym->getVA(ctx, a) - p; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_IE_TO_LE: @@ -968,10 +968,10 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, return ctx.in.got->getTlsDescAddr(*r.sym) + a - p; case R_TLSDESC_GOTPLT: return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA(); - case R_AARCH64_TLSDESC_PAGE: + case RE_AARCH64_TLSDESC_PAGE: return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) - getAArch64Page(p); - case R_LOONGARCH_TLSDESC_PAGE_PC: + case RE_LOONGARCH_TLSDESC_PAGE_PC: return getLoongArchPageDelta(ctx.in.got->getTlsDescAddr(*r.sym) + a, p, r.type); case R_TLSGD_GOT: @@ -980,7 +980,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, return ctx.in.got->getGlobalDynAddr(*r.sym) + a - ctx.in.gotPlt->getVA(); case R_TLSGD_PC: return ctx.in.got->getGlobalDynAddr(*r.sym) + a - p; - case R_LOONGARCH_TLSGD_PAGE_PC: + case RE_LOONGARCH_TLSGD_PAGE_PC: return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p, r.type); case R_TLSLD_GOTPLT: @@ -1114,7 +1114,7 @@ void InputSection::relocateNonAlloc(Ctx &ctx, uint8_t *buf, // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC // sections. if (LLVM_LIKELY(expr == R_ABS) || expr == R_DTPREL || expr == R_GOTPLTREL || - expr == R_RISCV_ADD || expr == R_ARM_SBREL) { + expr == RE_RISCV_ADD || expr == RE_ARM_SBREL) { target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(ctx, addend))); continue; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 605321b3cc9e3..4aa27b0a71bc1 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -205,32 +205,32 @@ static bool isAbsoluteValue(const Symbol &sym) { // Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr expr) { return oneof(expr); + R_GOTPLT_PC, RE_LOONGARCH_PLT_PAGE_PC, RE_PPC32_PLTREL, + RE_PPC64_CALL_PLT>(expr); } bool lld::elf::needsGot(RelExpr expr) { - return oneof( + return oneof( expr); } // True if this expression is of the form Sym - X, where X is a position in the // file (PC, or GOT for example). static bool isRelExpr(RelExpr expr) { - return oneof(expr); + return oneof(expr); } static RelExpr toPlt(RelExpr expr) { switch (expr) { - case R_LOONGARCH_PAGE_PC: - return R_LOONGARCH_PLT_PAGE_PC; - case R_PPC64_CALL: - return R_PPC64_CALL_PLT; + case RE_LOONGARCH_PAGE_PC: + return RE_LOONGARCH_PLT_PAGE_PC; + case RE_PPC64_CALL: + return RE_PPC64_CALL_PLT; case R_PC: return R_PLT_PC; case R_ABS: @@ -247,12 +247,12 @@ static RelExpr fromPlt(RelExpr expr) { // reference to the symbol itself. switch (expr) { case R_PLT_PC: - case R_PPC32_PLTREL: + case RE_PPC32_PLTREL: return R_PC; - case R_LOONGARCH_PLT_PAGE_PC: - return R_LOONGARCH_PAGE_PC; - case R_PPC64_CALL_PLT: - return R_PPC64_CALL; + case RE_LOONGARCH_PLT_PAGE_PC: + return RE_LOONGARCH_PAGE_PC; + case RE_PPC64_CALL_PLT: + return RE_PPC64_CALL; case R_PLT: return R_ABS; case R_PLT_GOTPLT: @@ -495,7 +495,7 @@ class RelocationScanner { template int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr, bool isLocal) const { - if (expr == R_MIPS_GOTREL && isLocal) + if (expr == RE_MIPS_GOTREL && isLocal) return sec->getFile()->mipsGp0; // The ABI says that the paired relocation is used only for REL. @@ -969,13 +969,14 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, uint64_t relOff) const { // These expressions always compute a constant - if (oneof(e)) + if (oneof(e)) return true; // These never do, except if the entire file is position dependent or if @@ -984,13 +985,13 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, return ctx.target->usesOnlyLowPageBits(type) || !ctx.arg.isPic; // R_AARCH64_AUTH_ABS64 requires a dynamic relocation. - if (sym.isPreemptible || e == R_AARCH64_AUTH) + if (sym.isPreemptible || e == RE_AARCH64_AUTH) return false; if (!ctx.arg.isPic) return true; // Constant when referencing a non-preemptible symbol. - if (e == R_SIZE || e == R_RISCV_LEB128) + if (e == R_SIZE || e == RE_RISCV_LEB128) return true; // For the target and the relocation, we want to know if they are @@ -1047,7 +1048,7 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, if (expr != R_GOT_PC) { // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call // stub type. It should be ignored if optimized to R_PC. - if (ctx.arg.emachine == EM_PPC && expr == R_PPC32_PLTREL) + if (ctx.arg.emachine == EM_PPC && expr == RE_PPC32_PLTREL) addend &= ~0x8000; // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into // call __tls_get_addr even if the symbol is non-preemptible. @@ -1087,7 +1088,7 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf ctx.in.mipsGot->addEntry(*sec->file, sym, addend, expr); } else if (!sym.isTls() || ctx.arg.emachine != EM_LOONGARCH) { - // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which + // Many LoongArch TLS relocs reuse the RE_LOONGARCH_GOT type, in which // case the NEEDS_GOT flag shouldn't get set. sym.setFlags(NEEDS_GOT); } @@ -1128,7 +1129,7 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, (isa(sec) && ctx.arg.emachine != EM_MIPS)); if (canWrite) { RelType rel = ctx.target->getDynRel(type); - if (oneof(expr) || + if (oneof(expr) || (rel == ctx.target->symbolicRel && !sym.isPreemptible)) { addRelativeReloc(ctx, *sec, offset, sym, addend, expr, type); return; @@ -1267,12 +1268,12 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, static unsigned handleMipsTlsRelocation(Ctx &ctx, RelType type, Symbol &sym, InputSectionBase &c, uint64_t offset, int64_t addend, RelExpr expr) { - if (expr == R_MIPS_TLSLD) { + if (expr == RE_MIPS_TLSLD) { ctx.in.mipsGot->addTlsIndex(*c.file); c.addReloc({expr, type, offset, addend, &sym}); return 1; } - if (expr == R_MIPS_TLSGD) { + if (expr == RE_MIPS_TLSGD) { ctx.in.mipsGot->addDynTlsEntry(*c.file, sym); c.addReloc({expr, type, offset, addend, &sym}); return 1; @@ -1307,7 +1308,7 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, // LoongArch does not yet implement transition from TLSDESC to LE/IE, so // generate TLSDESC dynamic relocation for the dynamic linker to handle. if (ctx.arg.emachine == EM_LOONGARCH && - oneof(expr)) { if (expr != R_TLSDESC_CALL) { sym.setFlags(NEEDS_TLSDESC); @@ -1318,7 +1319,7 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, bool isRISCV = ctx.arg.emachine == EM_RISCV; - if (oneof(expr) && ctx.arg.shared) { // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not @@ -1387,9 +1388,9 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } - if (oneof(expr)) { + RE_LOONGARCH_TLSGD_PAGE_PC>(expr)) { if (!execOptimize) { sym.setFlags(NEEDS_TLSGD); sec->addReloc({expr, type, offset, addend, &sym}); @@ -1413,8 +1414,8 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return ctx.target->getTlsGdRelaxSkip(type); } - if (oneof(expr)) { + if (oneof(expr)) { ctx.hasTlsIe.store(true, std::memory_order_relaxed); // Initial-Exec relocs can be optimized to Local-Exec if the symbol is // locally defined. This is not supported on SystemZ. @@ -1524,8 +1525,8 @@ void RelocationScanner::scanOne(typename Relocs::const_iterator &i) { if (oneof(expr)) { ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed); - } else if (oneof(expr)) { + } else if (oneof(expr)) { ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed); } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 041bd48048587..71cea0220e04c 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -89,42 +89,42 @@ enum RelExpr { // // Even though RelExpr is intended to be a target-neutral representation // of a relocation type, there are some relocations whose semantics are - // unique to a target. Such relocation are marked with R_. - R_AARCH64_GOT_PAGE_PC, - R_AARCH64_GOT_PAGE, - R_AARCH64_PAGE_PC, - R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, - R_AARCH64_TLSDESC_PAGE, - R_AARCH64_AUTH, - R_ARM_PCA, - R_ARM_SBREL, - R_MIPS_GOTREL, - R_MIPS_GOT_GP, - R_MIPS_GOT_GP_PC, - R_MIPS_GOT_LOCAL_PAGE, - R_MIPS_GOT_OFF, - R_MIPS_GOT_OFF32, - R_MIPS_TLSGD, - R_MIPS_TLSLD, - R_PPC32_PLTREL, - R_PPC64_CALL, - R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, - R_PPC64_TOCBASE, - R_PPC64_RELAX_GOT_PC, - R_RISCV_ADD, - R_RISCV_LEB128, - R_RISCV_PC_INDIRECT, + // unique to a target. Such relocation are marked with RE_. + RE_AARCH64_GOT_PAGE_PC, + RE_AARCH64_GOT_PAGE, + RE_AARCH64_PAGE_PC, + RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, + RE_AARCH64_TLSDESC_PAGE, + RE_AARCH64_AUTH, + RE_ARM_PCA, + RE_ARM_SBREL, + RE_MIPS_GOTREL, + RE_MIPS_GOT_GP, + RE_MIPS_GOT_GP_PC, + RE_MIPS_GOT_LOCAL_PAGE, + RE_MIPS_GOT_OFF, + RE_MIPS_GOT_OFF32, + RE_MIPS_TLSGD, + RE_MIPS_TLSLD, + RE_PPC32_PLTREL, + RE_PPC64_CALL, + RE_PPC64_CALL_PLT, + RE_PPC64_RELAX_TOC, + RE_PPC64_TOCBASE, + RE_PPC64_RELAX_GOT_PC, + RE_RISCV_ADD, + RE_RISCV_LEB128, + RE_RISCV_PC_INDIRECT, // Same as R_PC but with page-aligned semantics. - R_LOONGARCH_PAGE_PC, + RE_LOONGARCH_PAGE_PC, // Same as R_PLT_PC but with page-aligned semantics. - R_LOONGARCH_PLT_PAGE_PC, + RE_LOONGARCH_PLT_PAGE_PC, // In addition to having page-aligned semantics, LoongArch GOT relocs are // also reused for TLS, making the semantics differ from other architectures. - R_LOONGARCH_GOT, - R_LOONGARCH_GOT_PAGE_PC, - R_LOONGARCH_TLSGD_PAGE_PC, - R_LOONGARCH_TLSDESC_PAGE_PC, + RE_LOONGARCH_GOT, + RE_LOONGARCH_GOT_PAGE_PC, + RE_LOONGARCH_TLSGD_PAGE_PC, + RE_LOONGARCH_TLSDESC_PAGE_PC, }; // Architecture-neutral representation of relocation. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 21fe2a25fa1bd..6c5f2a614639c 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -749,7 +749,7 @@ MipsGotSection::MipsGotSection(Ctx &ctx) void MipsGotSection::addEntry(InputFile &file, Symbol &sym, int64_t addend, RelExpr expr) { FileGot &g = getGot(file); - if (expr == R_MIPS_GOT_LOCAL_PAGE) { + if (expr == RE_MIPS_GOT_LOCAL_PAGE) { if (const OutputSection *os = sym.getOutputSection()) g.pagesMap.insert({os, {}}); else @@ -760,7 +760,7 @@ void MipsGotSection::addEntry(InputFile &file, Symbol &sym, int64_t addend, g.relocs.insert({&sym, 0}); else if (sym.isPreemptible) g.global.insert({&sym, 0}); - else if (expr == R_MIPS_GOT_OFF32) + else if (expr == RE_MIPS_GOT_OFF32) g.local32.insert({{&sym, addend}, 0}); else g.local16.insert({{&sym, addend}, 0}); From 61bf308cf2fc32452f14861c102ace89f5f36fec Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Tue, 3 Dec 2024 11:26:33 -0600 Subject: [PATCH 096/191] Revert "[mlir python] Add nanobind support for standalone dialects." (#118517) Reverts llvm/llvm-project#117922 because deps aren't met on some of the post-commit build bots. --- mlir/cmake/modules/AddMLIRPython.cmake | 27 +- mlir/cmake/modules/MLIRDetectPythonEnv.cmake | 39 - mlir/docs/Bindings/Python.md | 20 +- .../examples/standalone/python/CMakeLists.txt | 22 +- ...onPybind11.cpp => StandaloneExtension.cpp} | 7 +- .../python/StandaloneExtensionNanobind.cpp | 35 - .../{standalone_nanobind.py => standalone.py} | 2 +- .../dialects/standalone_pybind11.py | 6 - .../standalone/test/python/smoketest.py | 14 +- .../mlir/Bindings/Python/Diagnostics.h | 59 -- .../mlir/Bindings/Python/NanobindAdaptors.h | 671 ------------------ .../mlir/Bindings/Python/PybindAdaptors.h | 43 +- mlir/lib/Bindings/Python/DialectLLVM.cpp | 4 +- .../Bindings/Python/TransformInterpreter.cpp | 7 +- mlir/python/CMakeLists.txt | 23 +- mlir/python/mlir/dialects/python_test.py | 17 +- mlir/python/requirements.txt | 1 - mlir/test/python/dialects/python_test.py | 59 +- mlir/test/python/lib/CMakeLists.txt | 3 +- ...odulePybind11.cpp => PythonTestModule.cpp} | 4 +- .../python/lib/PythonTestModuleNanobind.cpp | 121 ---- utils/bazel/WORKSPACE | 18 - .../llvm-project-overlay/mlir/BUILD.bazel | 50 +- utils/bazel/third_party_build/nanobind.BUILD | 25 - utils/bazel/third_party_build/robin_map.BUILD | 12 - 25 files changed, 105 insertions(+), 1184 deletions(-) rename mlir/examples/standalone/python/{StandaloneExtensionPybind11.cpp => StandaloneExtension.cpp} (81%) delete mode 100644 mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp rename mlir/examples/standalone/python/mlir_standalone/dialects/{standalone_nanobind.py => standalone.py} (78%) delete mode 100644 mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py delete mode 100644 mlir/include/mlir/Bindings/Python/Diagnostics.h delete mode 100644 mlir/include/mlir/Bindings/Python/NanobindAdaptors.h rename mlir/test/python/lib/{PythonTestModulePybind11.cpp => PythonTestModule.cpp} (96%) delete mode 100644 mlir/test/python/lib/PythonTestModuleNanobind.cpp delete mode 100644 utils/bazel/third_party_build/nanobind.BUILD delete mode 100644 utils/bazel/third_party_build/robin_map.BUILD diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake index 67619a90c90be..7b91f43e2d57f 100644 --- a/mlir/cmake/modules/AddMLIRPython.cmake +++ b/mlir/cmake/modules/AddMLIRPython.cmake @@ -114,11 +114,10 @@ endfunction() # EMBED_CAPI_LINK_LIBS: Dependent CAPI libraries that this extension depends # on. These will be collected for all extensions and put into an # aggregate dylib that is linked against. -# PYTHON_BINDINGS_LIBRARY: Either pybind11 or nanobind. function(declare_mlir_python_extension name) cmake_parse_arguments(ARG "" - "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT;PYTHON_BINDINGS_LIBRARY" + "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT" "SOURCES;PRIVATE_LINK_LIBS;EMBED_CAPI_LINK_LIBS" ${ARGN}) @@ -127,20 +126,15 @@ function(declare_mlir_python_extension name) endif() set(_install_destination "src/python/${name}") - if(NOT ARG_PYTHON_BINDINGS_LIBRARY) - set(ARG_PYTHON_BINDINGS_LIBRARY "pybind11") - endif() - add_library(${name} INTERFACE) set_target_properties(${name} PROPERTIES # Yes: Leading-lowercase property names are load bearing and the recommended # way to do this: https://gitlab.kitware.com/cmake/cmake/-/issues/19261 - EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS;mlir_python_BINDINGS_LIBRARY" + EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS" mlir_python_SOURCES_TYPE extension mlir_python_EXTENSION_MODULE_NAME "${ARG_MODULE_NAME}" mlir_python_EMBED_CAPI_LINK_LIBS "${ARG_EMBED_CAPI_LINK_LIBS}" mlir_python_DEPENDS "" - mlir_python_BINDINGS_LIBRARY "${ARG_PYTHON_BINDINGS_LIBRARY}" ) # Set the interface source and link_libs properties of the target @@ -229,14 +223,12 @@ function(add_mlir_python_modules name) elseif(_source_type STREQUAL "extension") # Native CPP extension. get_target_property(_module_name ${sources_target} mlir_python_EXTENSION_MODULE_NAME) - get_target_property(_bindings_library ${sources_target} mlir_python_BINDINGS_LIBRARY) # Transform relative source to based on root dir. set(_extension_target "${modules_target}.extension.${_module_name}.dso") add_mlir_python_extension(${_extension_target} "${_module_name}" INSTALL_COMPONENT ${modules_target} INSTALL_DIR "${ARG_INSTALL_PREFIX}/_mlir_libs" OUTPUT_DIRECTORY "${ARG_ROOT_PREFIX}/_mlir_libs" - PYTHON_BINDINGS_LIBRARY ${_bindings_library} LINK_LIBS PRIVATE ${sources_target} ${ARG_COMMON_CAPI_LINK_LIBS} @@ -642,7 +634,7 @@ endfunction() function(add_mlir_python_extension libname extname) cmake_parse_arguments(ARG "" - "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY;PYTHON_BINDINGS_LIBRARY" + "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY" "SOURCES;LINK_LIBS" ${ARGN}) if(ARG_UNPARSED_ARGUMENTS) @@ -652,16 +644,9 @@ function(add_mlir_python_extension libname extname) # The actual extension library produces a shared-object or DLL and has # sources that must be compiled in accordance with pybind11 needs (RTTI and # exceptions). - if(NOT DEFINED ARG_PYTHON_BINDINGS_LIBRARY OR ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "pybind11") - pybind11_add_module(${libname} - ${ARG_SOURCES} - ) - elseif(ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "nanobind") - nanobind_add_module(${libname} - NB_DOMAIN mlir - ${ARG_SOURCES} - ) - endif() + pybind11_add_module(${libname} + ${ARG_SOURCES} + ) # The extension itself must be compiled with RTTI and exceptions enabled. # Also, some warning classes triggered by pybind11 are disabled. diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake index c62ac7fa615ea..05397b7a1e1c7 100644 --- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake +++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake @@ -21,12 +21,6 @@ macro(mlir_configure_python_dev_packages) find_package(Python3 ${LLVM_MINIMUM_PYTHON_VERSION} COMPONENTS Interpreter ${_python_development_component} REQUIRED) - - # It's a little silly to detect Python a second time, but nanobind's cmake - # code looks for Python_ not Python3_. - find_package(Python ${LLVM_MINIMUM_PYTHON_VERSION} - COMPONENTS Interpreter ${_python_development_component} REQUIRED) - unset(_python_development_component) message(STATUS "Found python include dirs: ${Python3_INCLUDE_DIRS}") message(STATUS "Found python libraries: ${Python3_LIBRARIES}") @@ -37,13 +31,6 @@ macro(mlir_configure_python_dev_packages) message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " "suffix = '${PYTHON_MODULE_SUFFIX}', " "extension = '${PYTHON_MODULE_EXTENSION}") - - mlir_detect_nanobind_install() - find_package(nanobind 2.2 CONFIG REQUIRED) - message(STATUS "Found nanobind v${nanobind_VERSION}: ${nanobind_INCLUDE_DIR}") - message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " - "suffix = '${PYTHON_MODULE_SUFFIX}', " - "extension = '${PYTHON_MODULE_EXTENSION}") endif() endmacro() @@ -71,29 +58,3 @@ function(mlir_detect_pybind11_install) set(pybind11_DIR "${PACKAGE_DIR}" PARENT_SCOPE) endif() endfunction() - - -# Detects a nanobind package installed in the current python environment -# and sets variables to allow it to be found. This allows nanobind to be -# installed via pip, which typically yields a much more recent version than -# the OS install, which will be available otherwise. -function(mlir_detect_nanobind_install) - if(nanobind_DIR) - message(STATUS "Using explicit nanobind cmake directory: ${nanobind_DIR} (-Dnanobind_DIR to change)") - else() - message(STATUS "Checking for nanobind in python path...") - execute_process( - COMMAND "${Python3_EXECUTABLE}" - -c "import nanobind;print(nanobind.cmake_dir(), end='')" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE STATUS - OUTPUT_VARIABLE PACKAGE_DIR - ERROR_QUIET) - if(NOT STATUS EQUAL "0") - message(STATUS "not found (install via 'pip install nanobind' or set nanobind_DIR)") - return() - endif() - message(STATUS "found (${PACKAGE_DIR})") - set(nanobind_DIR "${PACKAGE_DIR}" PARENT_SCOPE) - endif() -endfunction() diff --git a/mlir/docs/Bindings/Python.md b/mlir/docs/Bindings/Python.md index a0bd1cac118ba..6e52c4deaad9a 100644 --- a/mlir/docs/Bindings/Python.md +++ b/mlir/docs/Bindings/Python.md @@ -1138,14 +1138,12 @@ attributes and types must connect to the relevant C APIs for building and inspection, which must be provided first. Bindings for `Attribute` and `Type` subclasses can be defined using [`include/mlir/Bindings/Python/PybindAdaptors.h`](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/PybindAdaptors.h) -or -[`include/mlir/Bindings/Python/NanobindAdaptors.h`](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h) -utilities that mimic pybind11/nanobind API for defining functions and -properties. These bindings are to be included in a separate module. The -utilities also provide automatic casting between C API handles `MlirAttribute` -and `MlirType` and their Python counterparts so that the C API handles can be -used directly in binding implementations. The methods and properties provided by -the bindings should follow the principles discussed above. +utilities that mimic pybind11 API for defining functions and properties. These +bindings are to be included in a separate pybind11 module. The utilities also +provide automatic casting between C API handles `MlirAttribute` and `MlirType` +and their Python counterparts so that the C API handles can be used directly in +binding implementations. The methods and properties provided by the bindings +should follow the principles discussed above. The attribute and type bindings for a dialect can be located in `lib/Bindings/Python/Dialect.cpp` and should be compiled into a separate @@ -1181,9 +1179,7 @@ make the passes available along with the dialect. Dialect functionality other than IR objects or passes, such as helper functions, can be exposed to Python similarly to attributes and types. C API is expected to exist for this functionality, which can then be wrapped using pybind11 and -`[include/mlir/Bindings/Python/PybindAdaptors.h](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/PybindAdaptors.h)`, -or nanobind and -`[include/mlir/Bindings/Python/NanobindAdaptors.h](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h)` +`[include/mlir/Bindings/Python/PybindAdaptors.h](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Bindings/Python/PybindAdaptors.h)` utilities to connect to the rest of Python API. The bindings can be located in a -separate module or in the same module as attributes and types, and +separate pybind11 module or in the same module as attributes and types, and loaded along with the dialect. diff --git a/mlir/examples/standalone/python/CMakeLists.txt b/mlir/examples/standalone/python/CMakeLists.txt index 69c82fd913579..a8c43827a5a37 100644 --- a/mlir/examples/standalone/python/CMakeLists.txt +++ b/mlir/examples/standalone/python/CMakeLists.txt @@ -17,32 +17,18 @@ declare_mlir_dialect_python_bindings( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir_standalone" TD_FILE dialects/StandaloneOps.td SOURCES - dialects/standalone_pybind11.py - dialects/standalone_nanobind.py + dialects/standalone.py DIALECT_NAME standalone) - -declare_mlir_python_extension(StandalonePythonSources.Pybind11Extension - MODULE_NAME _standaloneDialectsPybind11 - ADD_TO_PARENT StandalonePythonSources - SOURCES - StandaloneExtensionPybind11.cpp - EMBED_CAPI_LINK_LIBS - StandaloneCAPI - PYTHON_BINDINGS_LIBRARY pybind11 -) - -declare_mlir_python_extension(StandalonePythonSources.NanobindExtension - MODULE_NAME _standaloneDialectsNanobind +declare_mlir_python_extension(StandalonePythonSources.Extension + MODULE_NAME _standaloneDialects ADD_TO_PARENT StandalonePythonSources SOURCES - StandaloneExtensionNanobind.cpp + StandaloneExtension.cpp EMBED_CAPI_LINK_LIBS StandaloneCAPI - PYTHON_BINDINGS_LIBRARY nanobind ) - ################################################################################ # Common CAPI ################################################################################ diff --git a/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp b/mlir/examples/standalone/python/StandaloneExtension.cpp similarity index 81% rename from mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp rename to mlir/examples/standalone/python/StandaloneExtension.cpp index 397db4c20e743..5e83060cd48d8 100644 --- a/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp +++ b/mlir/examples/standalone/python/StandaloneExtension.cpp @@ -1,7 +1,4 @@ -//===- StandaloneExtensionPybind11.cpp - Extension module -----------------===// -// -// This is the pybind11 version of the example module. There is also a nanobind -// example in StandaloneExtensionNanobind.cpp. +//===- StandaloneExtension.cpp - Extension module -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,7 +11,7 @@ using namespace mlir::python::adaptors; -PYBIND11_MODULE(_standaloneDialectsPybind11, m) { +PYBIND11_MODULE(_standaloneDialects, m) { //===--------------------------------------------------------------------===// // standalone dialect //===--------------------------------------------------------------------===// diff --git a/mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp b/mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp deleted file mode 100644 index 6d83dc585dcd1..0000000000000 --- a/mlir/examples/standalone/python/StandaloneExtensionNanobind.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===- StandaloneExtension.cpp - Extension module -------------------------===// -// -// This is the nanobind version of the example module. There is also a pybind11 -// example in StandaloneExtensionPybind11.cpp. -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "Standalone-c/Dialects.h" -#include "mlir/Bindings/Python/NanobindAdaptors.h" - -namespace nb = nanobind; - -NB_MODULE(_standaloneDialectsNanobind, m) { - //===--------------------------------------------------------------------===// - // standalone dialect - //===--------------------------------------------------------------------===// - auto standaloneM = m.def_submodule("standalone"); - - standaloneM.def( - "register_dialect", - [](MlirContext context, bool load) { - MlirDialectHandle handle = mlirGetDialectHandle__standalone__(); - mlirDialectHandleRegisterDialect(handle, context); - if (load) { - mlirDialectHandleLoadDialect(handle, context); - } - }, - nb::arg("context").none() = nb::none(), nb::arg("load") = true); -} diff --git a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_nanobind.py b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone.py similarity index 78% rename from mlir/examples/standalone/python/mlir_standalone/dialects/standalone_nanobind.py rename to mlir/examples/standalone/python/mlir_standalone/dialects/standalone.py index 6218720951c82..c958b2ac19368 100644 --- a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_nanobind.py +++ b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone.py @@ -3,4 +3,4 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from ._standalone_ops_gen import * -from .._mlir_libs._standaloneDialectsNanobind.standalone import * +from .._mlir_libs._standaloneDialects.standalone import * diff --git a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py deleted file mode 100644 index bfb98e404e13f..0000000000000 --- a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py +++ /dev/null @@ -1,6 +0,0 @@ -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from ._standalone_ops_gen import * -from .._mlir_libs._standaloneDialectsPybind11.standalone import * diff --git a/mlir/examples/standalone/test/python/smoketest.py b/mlir/examples/standalone/test/python/smoketest.py index bd40c65d16164..08e08cbd2fe24 100644 --- a/mlir/examples/standalone/test/python/smoketest.py +++ b/mlir/examples/standalone/test/python/smoketest.py @@ -1,17 +1,7 @@ -# RUN: %python %s pybind11 | FileCheck %s -# RUN: %python %s nanobind | FileCheck %s +# RUN: %python %s | FileCheck %s -import sys from mlir_standalone.ir import * -from mlir_standalone.dialects import builtin as builtin_d - -if sys.argv[1] == "pybind11": - from mlir_standalone.dialects import standalone_pybind11 as standalone_d -elif sys.argv[1] == "nanobind": - from mlir_standalone.dialects import standalone_nanobind as standalone_d -else: - raise ValueError("Expected either pybind11 or nanobind as arguments") - +from mlir_standalone.dialects import builtin as builtin_d, standalone as standalone_d with Context(): standalone_d.register_dialect() diff --git a/mlir/include/mlir/Bindings/Python/Diagnostics.h b/mlir/include/mlir/Bindings/Python/Diagnostics.h deleted file mode 100644 index ea80e14dde0f3..0000000000000 --- a/mlir/include/mlir/Bindings/Python/Diagnostics.h +++ /dev/null @@ -1,59 +0,0 @@ -//===- Diagnostics.h - Helpers for diagnostics in Python bindings ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_BINDINGS_PYTHON_DIAGNOSTICS_H -#define MLIR_BINDINGS_PYTHON_DIAGNOSTICS_H - -#include -#include - -#include "mlir-c/Diagnostics.h" -#include "mlir-c/IR.h" -#include "llvm/ADT/StringRef.h" - -namespace mlir { -namespace python { - -/// RAII scope intercepting all diagnostics into a string. The message must be -/// checked before this goes out of scope. -class CollectDiagnosticsToStringScope { -public: - explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { - handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, - /*deleteUserData=*/nullptr); - } - ~CollectDiagnosticsToStringScope() { - assert(errorMessage.empty() && "unchecked error message"); - mlirContextDetachDiagnosticHandler(context, handlerID); - } - - [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } - -private: - static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { - auto printer = +[](MlirStringRef message, void *data) { - *static_cast(data) += - llvm::StringRef(message.data, message.length); - }; - MlirLocation loc = mlirDiagnosticGetLocation(diag); - *static_cast(data) += "at "; - mlirLocationPrint(loc, printer, data); - *static_cast(data) += ": "; - mlirDiagnosticPrint(diag, printer, data); - return mlirLogicalResultSuccess(); - } - - MlirContext context; - MlirDiagnosticHandlerID handlerID; - std::string errorMessage = ""; -}; - -} // namespace python -} // namespace mlir - -#endif // MLIR_BINDINGS_PYTHON_DIAGNOSTICS_H diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h deleted file mode 100644 index 5e01cebcb09c9..0000000000000 --- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h +++ /dev/null @@ -1,671 +0,0 @@ -//===- NanobindAdaptors.h - Interop with MLIR APIs via nanobind -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This file contains adaptors for clients of the core MLIR Python APIs to -// interop via MLIR CAPI types, using nanobind. The facilities here do not -// depend on implementation details of the MLIR Python API and do not introduce -// C++-level dependencies with it (requiring only Python and CAPI-level -// dependencies). -// -// It is encouraged to be used both in-tree and out-of-tree. For in-tree use -// cases, it should be used for dialect implementations (versus relying on -// Pybind-based internals of the core libraries). -//===----------------------------------------------------------------------===// - -#ifndef MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H -#define MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H - -#include -#include - -#include - -#include "mlir-c/Bindings/Python/Interop.h" -#include "mlir-c/Diagnostics.h" -#include "mlir-c/IR.h" -#include "llvm/ADT/Twine.h" - -// Raw CAPI type casters need to be declared before use, so always include them -// first. -namespace nanobind { -namespace detail { - -/// Helper to convert a presumed MLIR API object to a capsule, accepting either -/// an explicit Capsule (which can happen when two C APIs are communicating -/// directly via Python) or indirectly by querying the MLIR_PYTHON_CAPI_PTR_ATTR -/// attribute (through which supported MLIR Python API objects export their -/// contained API pointer as a capsule). Throws a type error if the object is -/// neither. This is intended to be used from type casters, which are invoked -/// with a raw handle (unowned). The returned object's lifetime may not extend -/// beyond the apiObject handle without explicitly having its refcount increased -/// (i.e. on return). -static nanobind::object mlirApiObjectToCapsule(nanobind::handle apiObject) { - if (PyCapsule_CheckExact(apiObject.ptr())) - return nanobind::borrow(apiObject); - if (!nanobind::hasattr(apiObject, MLIR_PYTHON_CAPI_PTR_ATTR)) { - std::string repr = nanobind::cast(nanobind::repr(apiObject)); - throw nanobind::type_error( - (llvm::Twine("Expected an MLIR object (got ") + repr + ").") - .str() - .c_str()); - } - return apiObject.attr(MLIR_PYTHON_CAPI_PTR_ATTR); -} - -// Note: Currently all of the following support cast from nanobind::object to -// the Mlir* C-API type, but only a few light-weight, context-bound ones -// implicitly cast the other way because the use case has not yet emerged and -// ownership is unclear. - -/// Casts object <-> MlirAffineMap. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirAffineMap, const_name("MlirAffineMap")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToAffineMap(capsule.ptr()); - if (mlirAffineMapIsNull(value)) { - return false; - } - return !mlirAffineMapIsNull(value); - } - static handle from_cpp(MlirAffineMap v, rv_policy, - cleanup_list *cleanup) noexcept { - nanobind::object capsule = - nanobind::steal(mlirPythonAffineMapToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("AffineMap") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - } -}; - -/// Casts object <-> MlirAttribute. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirAttribute, const_name("MlirAttribute")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToAttribute(capsule.ptr()); - return !mlirAttributeIsNull(value); - } - static handle from_cpp(MlirAttribute v, rv_policy, - cleanup_list *cleanup) noexcept { - nanobind::object capsule = - nanobind::steal(mlirPythonAttributeToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Attribute") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() - .release(); - } -}; - -/// Casts object -> MlirBlock. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirBlock, const_name("MlirBlock")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToBlock(capsule.ptr()); - return !mlirBlockIsNull(value); - } -}; - -/// Casts object -> MlirContext. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirContext, const_name("MlirContext")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - if (src.is_none()) { - // Gets the current thread-bound context. - // TODO: This raises an error of "No current context" currently. - // Update the implementation to pretty-print the helpful error that the - // core implementations print in this case. - src = nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Context") - .attr("current"); - } - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToContext(capsule.ptr()); - return !mlirContextIsNull(value); - } -}; - -/// Casts object <-> MlirDialectRegistry. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirDialectRegistry, const_name("MlirDialectRegistry")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToDialectRegistry(capsule.ptr()); - return !mlirDialectRegistryIsNull(value); - } - static handle from_cpp(MlirDialectRegistry v, rv_policy, - cleanup_list *cleanup) noexcept { - nanobind::object capsule = nanobind::steal( - mlirPythonDialectRegistryToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("DialectRegistry") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - } -}; - -/// Casts object <-> MlirLocation. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirLocation, const_name("MlirLocation")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - if (src.is_none()) { - // Gets the current thread-bound context. - src = nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Location") - .attr("current"); - } - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToLocation(capsule.ptr()); - return !mlirLocationIsNull(value); - } - static handle from_cpp(MlirLocation v, rv_policy, - cleanup_list *cleanup) noexcept { - nanobind::object capsule = - nanobind::steal(mlirPythonLocationToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Location") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - } -}; - -/// Casts object <-> MlirModule. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirModule, const_name("MlirModule")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToModule(capsule.ptr()); - return !mlirModuleIsNull(value); - } - static handle from_cpp(MlirModule v, rv_policy, - cleanup_list *cleanup) noexcept { - nanobind::object capsule = - nanobind::steal(mlirPythonModuleToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Module") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirFrozenRewritePatternSet. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirFrozenRewritePatternSet, - const_name("MlirFrozenRewritePatternSet")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); - return value.ptr != nullptr; - } - static handle from_cpp(MlirFrozenRewritePatternSet v, rv_policy, handle) { - nanobind::object capsule = nanobind::steal( - mlirPythonFrozenRewritePatternSetToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("rewrite")) - .attr("FrozenRewritePatternSet") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirOperation. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirOperation, const_name("MlirOperation")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToOperation(capsule.ptr()); - return !mlirOperationIsNull(value); - } - static handle from_cpp(MlirOperation v, rv_policy, - cleanup_list *cleanup) noexcept { - if (v.ptr == nullptr) - return nanobind::none(); - nanobind::object capsule = - nanobind::steal(mlirPythonOperationToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Operation") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirValue. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirValue, const_name("MlirValue")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToValue(capsule.ptr()); - return !mlirValueIsNull(value); - } - static handle from_cpp(MlirValue v, rv_policy, - cleanup_list *cleanup) noexcept { - if (v.ptr == nullptr) - return nanobind::none(); - nanobind::object capsule = - nanobind::steal(mlirPythonValueToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Value") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() - .release(); - }; -}; - -/// Casts object -> MlirPassManager. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirPassManager, const_name("MlirPassManager")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToPassManager(capsule.ptr()); - return !mlirPassManagerIsNull(value); - } -}; - -/// Casts object <-> MlirTypeID. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirTypeID, const_name("MlirTypeID")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToTypeID(capsule.ptr()); - return !mlirTypeIDIsNull(value); - } - static handle from_cpp(MlirTypeID v, rv_policy, - cleanup_list *cleanup) noexcept { - if (v.ptr == nullptr) - return nanobind::none(); - nanobind::object capsule = - nanobind::steal(mlirPythonTypeIDToCapsule(v)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("TypeID") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirType. -template <> -struct type_caster { - NB_TYPE_CASTER(MlirType, const_name("MlirType")); - bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) { - nanobind::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToType(capsule.ptr()); - return !mlirTypeIsNull(value); - } - static handle from_cpp(MlirType t, rv_policy, - cleanup_list *cleanup) noexcept { - nanobind::object capsule = - nanobind::steal(mlirPythonTypeToCapsule(t)); - return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Type") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() - .release(); - } -}; - -} // namespace detail -} // namespace nanobind - -namespace mlir { -namespace python { -namespace nanobind_adaptors { - -/// Provides a facility like nanobind::class_ for defining a new class in a -/// scope, but this allows extension of an arbitrary Python class, defining -/// methods on it is a similar way. Classes defined in this way are very similar -/// to if defined in Python in the usual way but use nanobind machinery to -/// do it. These are not "real" nanobind classes but pure Python classes -/// with no relation to a concrete C++ class. -/// -/// Derived from a discussion upstream: -/// https://github.com/pybind/pybind11/issues/1193 -/// (plus a fair amount of extra curricular poking) -/// TODO: If this proves useful, see about including it in nanobind. -class pure_subclass { -public: - pure_subclass(nanobind::handle scope, const char *derivedClassName, - const nanobind::object &superClass) { - nanobind::object pyType = - nanobind::borrow((PyObject *)&PyType_Type); - nanobind::object metaclass = pyType(superClass); - nanobind::dict attributes; - - thisClass = metaclass(derivedClassName, nanobind::make_tuple(superClass), - attributes); - scope.attr(derivedClassName) = thisClass; - } - - template - pure_subclass &def(const char *name, Func &&f, const Extra &...extra) { - nanobind::object cf = nanobind::cpp_function( - std::forward(f), nanobind::name(name), nanobind::is_method(), - nanobind::scope(thisClass), extra...); - thisClass.attr(name) = cf; - return *this; - } - - template - pure_subclass &def_property_readonly(const char *name, Func &&f, - const Extra &...extra) { - nanobind::object cf = nanobind::cpp_function( - std::forward(f), nanobind::name(name), nanobind::is_method(), - nanobind::scope(thisClass), extra...); - auto builtinProperty = - nanobind::borrow((PyObject *)&PyProperty_Type); - thisClass.attr(name) = builtinProperty(cf); - return *this; - } - - template - pure_subclass &def_staticmethod(const char *name, Func &&f, - const Extra &...extra) { - static_assert(!std::is_member_function_pointer::value, - "def_staticmethod(...) called with a non-static member " - "function pointer"); - nanobind::object cf = nanobind::cpp_function( - std::forward(f), - nanobind::name(name), // nanobind::scope(thisClass), - extra...); - thisClass.attr(name) = cf; - return *this; - } - - template - pure_subclass &def_classmethod(const char *name, Func &&f, - const Extra &...extra) { - static_assert(!std::is_member_function_pointer::value, - "def_classmethod(...) called with a non-static member " - "function pointer"); - nanobind::object cf = nanobind::cpp_function( - std::forward(f), - nanobind::name(name), // nanobind::scope(thisClass), - extra...); - thisClass.attr(name) = - nanobind::borrow(PyClassMethod_New(cf.ptr())); - return *this; - } - - nanobind::object get_class() const { return thisClass; } - -protected: - nanobind::object superClass; - nanobind::object thisClass; -}; - -/// Creates a custom subclass of mlir.ir.Attribute, implementing a casting -/// constructor and type checking methods. -class mlir_attribute_subclass : public pure_subclass { -public: - using IsAFunctionTy = bool (*)(MlirAttribute); - using GetTypeIDFunctionTy = MlirTypeID (*)(); - - /// Subclasses by looking up the super-class dynamically. - mlir_attribute_subclass(nanobind::handle scope, const char *attrClassName, - IsAFunctionTy isaFunction, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : mlir_attribute_subclass( - scope, attrClassName, isaFunction, - nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Attribute"), - getTypeIDFunction) {} - - /// Subclasses with a provided mlir.ir.Attribute super-class. This must - /// be used if the subclass is being defined in the same extension module - /// as the mlir.ir class (otherwise, it will trigger a recursive - /// initialization). - mlir_attribute_subclass(nanobind::handle scope, const char *typeClassName, - IsAFunctionTy isaFunction, - const nanobind::object &superCls, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : pure_subclass(scope, typeClassName, superCls) { - // Casting constructor. Note that it hard, if not impossible, to properly - // call chain to parent `__init__` in nanobind due to its special handling - // for init functions that don't have a fully constructed self-reference, - // which makes it impossible to forward it to `__init__` of a superclass. - // Instead, provide a custom `__new__` and call that of a superclass, which - // eventually calls `__init__` of the superclass. Since attribute subclasses - // have no additional members, we can just return the instance thus created - // without amending it. - std::string captureTypeName( - typeClassName); // As string in case if typeClassName is not static. - nanobind::object newCf = nanobind::cpp_function( - [superCls, isaFunction, captureTypeName]( - nanobind::object cls, nanobind::object otherAttribute) { - MlirAttribute rawAttribute = - nanobind::cast(otherAttribute); - if (!isaFunction(rawAttribute)) { - auto origRepr = - nanobind::cast(nanobind::repr(otherAttribute)); - throw std::invalid_argument( - (llvm::Twine("Cannot cast attribute to ") + captureTypeName + - " (from " + origRepr + ")") - .str()); - } - nanobind::object self = superCls.attr("__new__")(cls, otherAttribute); - return self; - }, - nanobind::name("__new__"), nanobind::arg("cls"), - nanobind::arg("cast_from_attr")); - thisClass.attr("__new__") = newCf; - - // 'isinstance' method. - def_staticmethod( - "isinstance", - [isaFunction](MlirAttribute other) { return isaFunction(other); }, - nanobind::arg("other_attribute")); - def("__repr__", [superCls, captureTypeName](nanobind::object self) { - return nanobind::repr(superCls(self)) - .attr("replace")(superCls.attr("__name__"), captureTypeName); - }); - if (getTypeIDFunction) { - def_staticmethod("get_static_typeid", - [getTypeIDFunction]() { return getTypeIDFunction(); }); - nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( - getTypeIDFunction())(nanobind::cpp_function( - [thisClass = thisClass](const nanobind::object &mlirAttribute) { - return thisClass(mlirAttribute); - })); - } - } -}; - -/// Creates a custom subclass of mlir.ir.Type, implementing a casting -/// constructor and type checking methods. -class mlir_type_subclass : public pure_subclass { -public: - using IsAFunctionTy = bool (*)(MlirType); - using GetTypeIDFunctionTy = MlirTypeID (*)(); - - /// Subclasses by looking up the super-class dynamically. - mlir_type_subclass(nanobind::handle scope, const char *typeClassName, - IsAFunctionTy isaFunction, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : mlir_type_subclass( - scope, typeClassName, isaFunction, - nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Type"), - getTypeIDFunction) {} - - /// Subclasses with a provided mlir.ir.Type super-class. This must - /// be used if the subclass is being defined in the same extension module - /// as the mlir.ir class (otherwise, it will trigger a recursive - /// initialization). - mlir_type_subclass(nanobind::handle scope, const char *typeClassName, - IsAFunctionTy isaFunction, - const nanobind::object &superCls, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : pure_subclass(scope, typeClassName, superCls) { - // Casting constructor. Note that it hard, if not impossible, to properly - // call chain to parent `__init__` in nanobind due to its special handling - // for init functions that don't have a fully constructed self-reference, - // which makes it impossible to forward it to `__init__` of a superclass. - // Instead, provide a custom `__new__` and call that of a superclass, which - // eventually calls `__init__` of the superclass. Since attribute subclasses - // have no additional members, we can just return the instance thus created - // without amending it. - std::string captureTypeName( - typeClassName); // As string in case if typeClassName is not static. - nanobind::object newCf = nanobind::cpp_function( - [superCls, isaFunction, captureTypeName](nanobind::object cls, - nanobind::object otherType) { - MlirType rawType = nanobind::cast(otherType); - if (!isaFunction(rawType)) { - auto origRepr = - nanobind::cast(nanobind::repr(otherType)); - throw std::invalid_argument((llvm::Twine("Cannot cast type to ") + - captureTypeName + " (from " + - origRepr + ")") - .str()); - } - nanobind::object self = superCls.attr("__new__")(cls, otherType); - return self; - }, - nanobind::name("__new__"), nanobind::arg("cls"), - nanobind::arg("cast_from_type")); - thisClass.attr("__new__") = newCf; - - // 'isinstance' method. - def_staticmethod( - "isinstance", - [isaFunction](MlirType other) { return isaFunction(other); }, - nanobind::arg("other_type")); - def("__repr__", [superCls, captureTypeName](nanobind::object self) { - return nanobind::repr(superCls(self)) - .attr("replace")(superCls.attr("__name__"), captureTypeName); - }); - if (getTypeIDFunction) { - // 'get_static_typeid' method. - // This is modeled as a static method instead of a static property because - // `def_property_readonly_static` is not available in `pure_subclass` and - // we do not want to introduce the complexity that pybind uses to - // implement it. - def_staticmethod("get_static_typeid", - [getTypeIDFunction]() { return getTypeIDFunction(); }); - nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( - getTypeIDFunction())(nanobind::cpp_function( - [thisClass = thisClass](const nanobind::object &mlirType) { - return thisClass(mlirType); - })); - } - } -}; - -/// Creates a custom subclass of mlir.ir.Value, implementing a casting -/// constructor and type checking methods. -class mlir_value_subclass : public pure_subclass { -public: - using IsAFunctionTy = bool (*)(MlirValue); - - /// Subclasses by looking up the super-class dynamically. - mlir_value_subclass(nanobind::handle scope, const char *valueClassName, - IsAFunctionTy isaFunction) - : mlir_value_subclass( - scope, valueClassName, isaFunction, - nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Value")) {} - - /// Subclasses with a provided mlir.ir.Value super-class. This must - /// be used if the subclass is being defined in the same extension module - /// as the mlir.ir class (otherwise, it will trigger a recursive - /// initialization). - mlir_value_subclass(nanobind::handle scope, const char *valueClassName, - IsAFunctionTy isaFunction, - const nanobind::object &superCls) - : pure_subclass(scope, valueClassName, superCls) { - // Casting constructor. Note that it hard, if not impossible, to properly - // call chain to parent `__init__` in nanobind due to its special handling - // for init functions that don't have a fully constructed self-reference, - // which makes it impossible to forward it to `__init__` of a superclass. - // Instead, provide a custom `__new__` and call that of a superclass, which - // eventually calls `__init__` of the superclass. Since attribute subclasses - // have no additional members, we can just return the instance thus created - // without amending it. - std::string captureValueName( - valueClassName); // As string in case if valueClassName is not static. - nanobind::object newCf = nanobind::cpp_function( - [superCls, isaFunction, captureValueName](nanobind::object cls, - nanobind::object otherValue) { - MlirValue rawValue = nanobind::cast(otherValue); - if (!isaFunction(rawValue)) { - auto origRepr = - nanobind::cast(nanobind::repr(otherValue)); - throw std::invalid_argument((llvm::Twine("Cannot cast value to ") + - captureValueName + " (from " + - origRepr + ")") - .str()); - } - nanobind::object self = superCls.attr("__new__")(cls, otherValue); - return self; - }, - nanobind::name("__new__"), nanobind::arg("cls"), - nanobind::arg("cast_from_value")); - thisClass.attr("__new__") = newCf; - - // 'isinstance' method. - def_staticmethod( - "isinstance", - [isaFunction](MlirValue other) { return isaFunction(other); }, - nanobind::arg("other_value")); - } -}; - -} // namespace nanobind_adaptors - -/// RAII scope intercepting all diagnostics into a string. The message must be -/// checked before this goes out of scope. -class CollectDiagnosticsToStringScope { -public: - explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { - handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, - /*deleteUserData=*/nullptr); - } - ~CollectDiagnosticsToStringScope() { - assert(errorMessage.empty() && "unchecked error message"); - mlirContextDetachDiagnosticHandler(context, handlerID); - } - - [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } - -private: - static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { - auto printer = +[](MlirStringRef message, void *data) { - *static_cast(data) += - llvm::StringRef(message.data, message.length); - }; - MlirLocation loc = mlirDiagnosticGetLocation(diag); - *static_cast(data) += "at "; - mlirLocationPrint(loc, printer, data); - *static_cast(data) += ": "; - mlirDiagnosticPrint(diag, printer, data); - return mlirLogicalResultSuccess(); - } - - MlirContext context; - MlirDiagnosticHandlerID handlerID; - std::string errorMessage = ""; -}; - -} // namespace python -} // namespace mlir - -#endif // MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H diff --git a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h index c8233355d1d67..df4b9bf713592 100644 --- a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h +++ b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h @@ -1,4 +1,4 @@ -//===- PybindAdaptors.h - Interop with MLIR APIs via pybind11 -------------===// +//===- PybindAdaptors.h - Adaptors for interop with MLIR APIs -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// // This file contains adaptors for clients of the core MLIR Python APIs to -// interop via MLIR CAPI types, using pybind11. The facilities here do not -// depend on implementation details of the MLIR Python API and do not introduce -// C++-level dependencies with it (requiring only Python and CAPI-level -// dependencies). +// interop via MLIR CAPI types. The facilities here do not depend on +// implementation details of the MLIR Python API and do not introduce C++-level +// dependencies with it (requiring only Python and CAPI-level dependencies). // // It is encouraged to be used both in-tree and out-of-tree. For in-tree use // cases, it should be used for dialect implementations (versus relying on @@ -612,6 +611,40 @@ class mlir_value_subclass : public pure_subclass { } // namespace adaptors +/// RAII scope intercepting all diagnostics into a string. The message must be +/// checked before this goes out of scope. +class CollectDiagnosticsToStringScope { +public: + explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { + handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, + /*deleteUserData=*/nullptr); + } + ~CollectDiagnosticsToStringScope() { + assert(errorMessage.empty() && "unchecked error message"); + mlirContextDetachDiagnosticHandler(context, handlerID); + } + + [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } + +private: + static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { + auto printer = +[](MlirStringRef message, void *data) { + *static_cast(data) += + llvm::StringRef(message.data, message.length); + }; + MlirLocation loc = mlirDiagnosticGetLocation(diag); + *static_cast(data) += "at "; + mlirLocationPrint(loc, printer, data); + *static_cast(data) += ": "; + mlirDiagnosticPrint(diag, printer, data); + return mlirLogicalResultSuccess(); + } + + MlirContext context; + MlirDiagnosticHandlerID handlerID; + std::string errorMessage = ""; +}; + } // namespace python } // namespace mlir diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp index cccf1370b8cc8..42a4c8c0793ba 100644 --- a/mlir/lib/Bindings/Python/DialectLLVM.cpp +++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp @@ -6,13 +6,11 @@ // //===----------------------------------------------------------------------===// -#include - #include "mlir-c/Dialect/LLVM.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" -#include "mlir/Bindings/Python/Diagnostics.h" #include "mlir/Bindings/Python/PybindAdaptors.h" +#include namespace py = pybind11; using namespace llvm; diff --git a/mlir/lib/Bindings/Python/TransformInterpreter.cpp b/mlir/lib/Bindings/Python/TransformInterpreter.cpp index 0c8c0e0a965aa..f6b4532b1b6be 100644 --- a/mlir/lib/Bindings/Python/TransformInterpreter.cpp +++ b/mlir/lib/Bindings/Python/TransformInterpreter.cpp @@ -10,15 +10,14 @@ // //===----------------------------------------------------------------------===// -#include -#include - #include "mlir-c/Dialect/Transform/Interpreter.h" #include "mlir-c/IR.h" #include "mlir-c/Support.h" -#include "mlir/Bindings/Python/Diagnostics.h" #include "mlir/Bindings/Python/PybindAdaptors.h" +#include +#include + namespace py = pybind11; namespace { diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index e1b870b53ad25..23187f256455b 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -683,9 +683,7 @@ if(MLIR_INCLUDE_TESTS) MLIRPythonTestSources.Dialects.PythonTest ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" ADD_TO_PARENT MLIRPythonTestSources.Dialects - SOURCES - dialects/python_test.py - ) + SOURCES dialects/python_test.py) set(LLVM_TARGET_DEFINITIONS "${MLIR_MAIN_SRC_DIR}/test/python/python_test_ops.td") mlir_tablegen( @@ -699,25 +697,12 @@ if(MLIR_INCLUDE_TESTS) ADD_TO_PARENT MLIRPythonTestSources.Dialects.PythonTest SOURCES "dialects/_python_test_ops_gen.py") - declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionPybind11 - MODULE_NAME _mlirPythonTestPybind11 - ADD_TO_PARENT MLIRPythonTestSources.Dialects - ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib" - PYTHON_BINDINGS_LIBRARY pybind11 - SOURCES - PythonTestModulePybind11.cpp - PRIVATE_LINK_LIBS - LLVMSupport - EMBED_CAPI_LINK_LIBS - MLIRCAPIPythonTestDialect - ) - declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionNanobind - MODULE_NAME _mlirPythonTestNanobind + declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtension + MODULE_NAME _mlirPythonTest ADD_TO_PARENT MLIRPythonTestSources.Dialects ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES - PythonTestModuleNanobind.cpp + PythonTestModule.cpp PRIVATE_LINK_LIBS LLVMSupport EMBED_CAPI_LINK_LIBS diff --git a/mlir/python/mlir/dialects/python_test.py b/mlir/python/mlir/dialects/python_test.py index 9380896c8c06e..b5baa80bc767f 100644 --- a/mlir/python/mlir/dialects/python_test.py +++ b/mlir/python/mlir/dialects/python_test.py @@ -3,14 +3,15 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from ._python_test_ops_gen import * +from .._mlir_libs._mlirPythonTest import ( + TestAttr, + TestType, + TestTensorValue, + TestIntegerRankedTensorType, +) -def register_python_test_dialect(registry, use_nanobind): - if use_nanobind: - from .._mlir_libs import _mlirPythonTestNanobind +def register_python_test_dialect(registry): + from .._mlir_libs import _mlirPythonTest - _mlirPythonTestNanobind.register_dialect(registry) - else: - from .._mlir_libs import _mlirPythonTestPybind11 - - _mlirPythonTestPybind11.register_dialect(registry) + _mlirPythonTest.register_dialect(registry) diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index ab8a9122919e1..272d066831f92 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,4 +1,3 @@ -nanobind>=2.0, <3.0 numpy>=1.19.5, <=2.1.2 pybind11>=2.10.0, <=2.13.6 PyYAML>=5.4.0, <=6.0.1 diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py index fd678f8321fd9..948d1225ea489 100644 --- a/mlir/test/python/dialects/python_test.py +++ b/mlir/test/python/dialects/python_test.py @@ -1,33 +1,12 @@ -# RUN: %PYTHON %s pybind11 | FileCheck %s -# RUN: %PYTHON %s nanobind | FileCheck %s +# RUN: %PYTHON %s | FileCheck %s -import sys from mlir.ir import * import mlir.dialects.func as func import mlir.dialects.python_test as test import mlir.dialects.tensor as tensor import mlir.dialects.arith as arith -if sys.argv[1] == "pybind11": - from mlir._mlir_libs._mlirPythonTestPybind11 import ( - TestAttr, - TestType, - TestTensorValue, - TestIntegerRankedTensorType, - ) - - test.register_python_test_dialect(get_dialect_registry(), use_nanobind=False) -elif sys.argv[1] == "nanobind": - from mlir._mlir_libs._mlirPythonTestNanobind import ( - TestAttr, - TestType, - TestTensorValue, - TestIntegerRankedTensorType, - ) - - test.register_python_test_dialect(get_dialect_registry(), use_nanobind=True) -else: - raise ValueError("Expected pybind11 or nanobind as argument") +test.register_python_test_dialect(get_dialect_registry()) def run(f): @@ -329,7 +308,7 @@ def testOptionalOperandOp(): @run def testCustomAttribute(): with Context() as ctx, Location.unknown(): - a = TestAttr.get() + a = test.TestAttr.get() # CHECK: #python_test.test_attr print(a) @@ -346,11 +325,11 @@ def testCustomAttribute(): print(repr(op2.test_attr)) # The following cast must not assert. - b = TestAttr(a) + b = test.TestAttr(a) unit = UnitAttr.get() try: - TestAttr(unit) + test.TestAttr(unit) except ValueError as e: assert "Cannot cast attribute to TestAttr" in str(e) else: @@ -359,7 +338,7 @@ def testCustomAttribute(): # The following must trigger a TypeError from our adaptors and must not # crash. try: - TestAttr(42) + test.TestAttr(42) except TypeError as e: assert "Expected an MLIR object" in str(e) else: @@ -368,7 +347,7 @@ def testCustomAttribute(): # The following must trigger a TypeError from pybind (therefore, not # checking its message) and must not crash. try: - TestAttr(42, 56) + test.TestAttr(42, 56) except TypeError: pass else: @@ -378,12 +357,12 @@ def testCustomAttribute(): @run def testCustomType(): with Context() as ctx: - a = TestType.get() + a = test.TestType.get() # CHECK: !python_test.test_type print(a) # The following cast must not assert. - b = TestType(a) + b = test.TestType(a) # Instance custom types should have typeids assert isinstance(b.typeid, TypeID) # Subclasses of ir.Type should not have a static_typeid @@ -395,7 +374,7 @@ def testCustomType(): i8 = IntegerType.get_signless(8) try: - TestType(i8) + test.TestType(i8) except ValueError as e: assert "Cannot cast type to TestType" in str(e) else: @@ -404,7 +383,7 @@ def testCustomType(): # The following must trigger a TypeError from our adaptors and must not # crash. try: - TestType(42) + test.TestType(42) except TypeError as e: assert "Expected an MLIR object" in str(e) else: @@ -413,7 +392,7 @@ def testCustomType(): # The following must trigger a TypeError from pybind (therefore, not # checking its message) and must not crash. try: - TestType(42, 56) + test.TestType(42, 56) except TypeError: pass else: @@ -426,7 +405,7 @@ def testTensorValue(): with Context() as ctx, Location.unknown(): i8 = IntegerType.get_signless(8) - class Tensor(TestTensorValue): + class Tensor(test.TestTensorValue): def __str__(self): return super().__str__().replace("Value", "Tensor") @@ -446,9 +425,9 @@ def __str__(self): # Classes of custom types that inherit from concrete types should have # static_typeid - assert isinstance(TestIntegerRankedTensorType.static_typeid, TypeID) + assert isinstance(test.TestIntegerRankedTensorType.static_typeid, TypeID) # And it should be equal to the in-tree concrete type - assert TestIntegerRankedTensorType.static_typeid == t.type.typeid + assert test.TestIntegerRankedTensorType.static_typeid == t.type.typeid d = tensor.EmptyOp([1, 2, 3], IntegerType.get_signless(5)).result # CHECK: Value(%{{.*}} = tensor.empty() : tensor<1x2x3xi5>) @@ -512,7 +491,7 @@ def inferReturnTypeComponents(): @run def testCustomTypeTypeCaster(): with Context() as ctx, Location.unknown(): - a = TestType.get() + a = test.TestType.get() assert a.typeid is not None b = Type.parse("!python_test.test_type") @@ -521,7 +500,7 @@ def testCustomTypeTypeCaster(): # CHECK: TestType(!python_test.test_type) print(repr(b)) - c = TestIntegerRankedTensorType.get([10, 10], 5) + c = test.TestIntegerRankedTensorType.get([10, 10], 5) # CHECK: tensor<10x10xi5> print(c) # CHECK: TestIntegerRankedTensorType(tensor<10x10xi5>) @@ -532,7 +511,7 @@ def testCustomTypeTypeCaster(): @register_type_caster(c.typeid) def type_caster(pytype): - return TestIntegerRankedTensorType(pytype) + return test.TestIntegerRankedTensorType(pytype) except RuntimeError as e: print(e) @@ -551,7 +530,7 @@ def type_caster(pytype): @register_type_caster(c.typeid, replace=True) def type_caster(pytype): - return TestIntegerRankedTensorType(pytype) + return test.TestIntegerRankedTensorType(pytype) d = tensor.EmptyOp([10, 10], IntegerType.get_signless(5)).result # CHECK: tensor<10x10xi5> diff --git a/mlir/test/python/lib/CMakeLists.txt b/mlir/test/python/lib/CMakeLists.txt index 198ed8211e773..d7cbbfbc21477 100644 --- a/mlir/test/python/lib/CMakeLists.txt +++ b/mlir/test/python/lib/CMakeLists.txt @@ -1,8 +1,7 @@ set(LLVM_OPTIONAL_SOURCES PythonTestCAPI.cpp PythonTestDialect.cpp - PythonTestModulePybind11.cpp - PythonTestModuleNanobind.cpp + PythonTestModule.cpp ) add_mlir_library(MLIRPythonTestDialect diff --git a/mlir/test/python/lib/PythonTestModulePybind11.cpp b/mlir/test/python/lib/PythonTestModule.cpp similarity index 96% rename from mlir/test/python/lib/PythonTestModulePybind11.cpp rename to mlir/test/python/lib/PythonTestModule.cpp index 94a5f5178d16e..a4f538dcb5594 100644 --- a/mlir/test/python/lib/PythonTestModulePybind11.cpp +++ b/mlir/test/python/lib/PythonTestModule.cpp @@ -5,8 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This is the pybind11 edition of the PythonTest dialect module. -//===----------------------------------------------------------------------===// #include "PythonTestCAPI.h" #include "mlir-c/BuiltinAttributes.h" @@ -23,7 +21,7 @@ static bool mlirTypeIsARankedIntegerTensor(MlirType t) { mlirTypeIsAInteger(mlirShapedTypeGetElementType(t)); } -PYBIND11_MODULE(_mlirPythonTestPybind11, m) { +PYBIND11_MODULE(_mlirPythonTest, m) { m.def( "register_python_test_dialect", [](MlirContext context, bool load) { diff --git a/mlir/test/python/lib/PythonTestModuleNanobind.cpp b/mlir/test/python/lib/PythonTestModuleNanobind.cpp deleted file mode 100644 index 7c504d04be0d1..0000000000000 --- a/mlir/test/python/lib/PythonTestModuleNanobind.cpp +++ /dev/null @@ -1,121 +0,0 @@ -//===- PythonTestModuleNanobind.cpp - PythonTest dialect extension --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This is the nanobind edition of the PythonTest dialect module. -//===----------------------------------------------------------------------===// - -#include -#include - -#include "PythonTestCAPI.h" -#include "mlir-c/BuiltinAttributes.h" -#include "mlir-c/BuiltinTypes.h" -#include "mlir-c/IR.h" -#include "mlir/Bindings/Python/NanobindAdaptors.h" - -namespace nb = nanobind; -using namespace mlir::python::nanobind_adaptors; - -static bool mlirTypeIsARankedIntegerTensor(MlirType t) { - return mlirTypeIsARankedTensor(t) && - mlirTypeIsAInteger(mlirShapedTypeGetElementType(t)); -} - -NB_MODULE(_mlirPythonTestNanobind, m) { - m.def( - "register_python_test_dialect", - [](MlirContext context, bool load) { - MlirDialectHandle pythonTestDialect = - mlirGetDialectHandle__python_test__(); - mlirDialectHandleRegisterDialect(pythonTestDialect, context); - if (load) { - mlirDialectHandleLoadDialect(pythonTestDialect, context); - } - }, - nb::arg("context"), nb::arg("load") = true); - - m.def( - "register_dialect", - [](MlirDialectRegistry registry) { - MlirDialectHandle pythonTestDialect = - mlirGetDialectHandle__python_test__(); - mlirDialectHandleInsertDialect(pythonTestDialect, registry); - }, - nb::arg("registry")); - - mlir_attribute_subclass(m, "TestAttr", - mlirAttributeIsAPythonTestTestAttribute, - mlirPythonTestTestAttributeGetTypeID) - .def_classmethod( - "get", - [](const nb::object &cls, MlirContext ctx) { - return cls(mlirPythonTestTestAttributeGet(ctx)); - }, - nb::arg("cls"), nb::arg("context").none() = nb::none()); - - mlir_type_subclass(m, "TestType", mlirTypeIsAPythonTestTestType, - mlirPythonTestTestTypeGetTypeID) - .def_classmethod( - "get", - [](const nb::object &cls, MlirContext ctx) { - return cls(mlirPythonTestTestTypeGet(ctx)); - }, - nb::arg("cls"), nb::arg("context").none() = nb::none()); - - auto typeCls = - mlir_type_subclass(m, "TestIntegerRankedTensorType", - mlirTypeIsARankedIntegerTensor, - nb::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("RankedTensorType")) - .def_classmethod( - "get", - [](const nb::object &cls, std::vector shape, - unsigned width, MlirContext ctx) { - MlirAttribute encoding = mlirAttributeGetNull(); - return cls(mlirRankedTensorTypeGet( - shape.size(), shape.data(), mlirIntegerTypeGet(ctx, width), - encoding)); - }, - nb::arg("cls"), nb::arg("shape"), nb::arg("width"), - nb::arg("context").none() = nb::none()); - - assert(nb::hasattr(typeCls.get_class(), "static_typeid") && - "TestIntegerRankedTensorType has no static_typeid"); - - MlirTypeID mlirRankedTensorTypeID = mlirRankedTensorTypeGetTypeID(); - - nb::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( - mlirRankedTensorTypeID, nb::arg("replace") = true)( - nanobind::cpp_function([typeCls](const nb::object &mlirType) { - return typeCls.get_class()(mlirType); - })); - - auto valueCls = mlir_value_subclass(m, "TestTensorValue", - mlirTypeIsAPythonTestTestTensorValue) - .def("is_null", [](MlirValue &self) { - return mlirValueIsNull(self); - }); - - nb::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_VALUE_CASTER_REGISTER_ATTR)( - mlirRankedTensorTypeID)( - nanobind::cpp_function([valueCls](const nb::object &valueObj) { - nb::object capsule = mlirApiObjectToCapsule(valueObj); - MlirValue v = mlirPythonCapsuleToValue(capsule.ptr()); - MlirType t = mlirValueGetType(v); - // This is hyper-specific in order to exercise/test registering a - // value caster from cpp (but only for a single test case; see - // testTensorValue python_test.py). - if (mlirShapedTypeHasStaticShape(t) && - mlirShapedTypeGetDimSize(t, 0) == 1 && - mlirShapedTypeGetDimSize(t, 1) == 2 && - mlirShapedTypeGetDimSize(t, 2) == 3) - return valueCls.get_class()(valueObj); - return valueObj; - })); -} diff --git a/utils/bazel/WORKSPACE b/utils/bazel/WORKSPACE index 66ba1ac1b17e1..7baca11eed3d3 100644 --- a/utils/bazel/WORKSPACE +++ b/utils/bazel/WORKSPACE @@ -148,24 +148,6 @@ maybe( url = "https://github.com/pybind/pybind11/archive/v2.10.3.zip", ) -maybe( - http_archive, - name = "robin_map", - strip_prefix = "robin-map-1.3.0", - sha256 = "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236", - build_file = "@llvm-raw//utils/bazel/third_party_build:robin_map.BUILD", - url = "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz", -) - -maybe( - http_archive, - name = "nanobind", - build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", - sha256 = "bfbfc7e5759f1669e4ddb48752b1ddc5647d1430e94614d6f8626df1d508e65a", - strip_prefix = "nanobind-2.2.0", - url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.2.0.tar.gz", -) - load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") py_repositories() diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 544becfa30b40..179fed2f5e9a0 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -932,6 +932,7 @@ exports_files( filegroup( name = "MLIRBindingsPythonHeaderFiles", srcs = glob([ + "lib/Bindings/Python/*.h", "include/mlir-c/Bindings/Python/*.h", "include/mlir/Bindings/Python/*.h", ]), @@ -941,10 +942,12 @@ cc_library( name = "MLIRBindingsPythonHeaders", includes = [ "include", + "lib/Bindings/Python", ], textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], deps = [ ":CAPIIRHeaders", + ":CAPITransformsHeaders", "@pybind11", "@rules_python//python/cc:current_py_cc_headers", ], @@ -954,41 +957,17 @@ cc_library( name = "MLIRBindingsPythonHeadersAndDeps", includes = [ "include", + "lib/Bindings/Python", ], textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], deps = [ ":CAPIIR", + ":CAPITransforms", "@pybind11", "@rules_python//python/cc:current_py_cc_headers", ], ) -cc_library( - name = "MLIRBindingsPythonNanobindHeaders", - includes = [ - "include", - ], - textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], - deps = [ - ":CAPIIRHeaders", - "@nanobind", - "@rules_python//python/cc:current_py_cc_headers", - ], -) - -cc_library( - name = "MLIRBindingsPythonNanobindHeadersAndDeps", - includes = [ - "include", - ], - textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], - deps = [ - ":CAPIIR", - "@nanobind", - "@rules_python//python/cc:current_py_cc_headers", - ], -) - # These flags are needed for pybind11 to work. PYBIND11_COPTS = [ "-fexceptions", @@ -1014,25 +993,16 @@ filegroup( ], ) -filegroup( - name = "MLIRBindingsPythonCoreHeaders", - srcs = glob([ - "lib/Bindings/Python/*.h", - ]), -) - cc_library( name = "MLIRBindingsPythonCore", srcs = [":MLIRBindingsPythonSourceFiles"], copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, - textual_hdrs = [":MLIRBindingsPythonCoreHeaders"], deps = [ ":CAPIAsync", ":CAPIDebug", ":CAPIIR", ":CAPIInterfaces", - ":CAPITransforms", ":MLIRBindingsPythonHeadersAndDeps", ":Support", ":config", @@ -1047,12 +1017,10 @@ cc_library( srcs = [":MLIRBindingsPythonSourceFiles"], copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, - textual_hdrs = [":MLIRBindingsPythonCoreHeaders"], deps = [ ":CAPIAsyncHeaders", ":CAPIDebugHeaders", ":CAPIIRHeaders", - ":CAPITransformsHeaders", ":MLIRBindingsPythonHeaders", ":Support", ":config", @@ -1082,9 +1050,6 @@ cc_binary( # These flags are needed for pybind11 to work. copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, - includes = [ - "lib/Bindings/Python", - ], linkshared = 1, linkstatic = 0, deps = [ @@ -1098,9 +1063,6 @@ cc_binary( srcs = ["lib/Bindings/Python/DialectLinalg.cpp"], copts = PYBIND11_COPTS, features = PYBIND11_FEATURES, - includes = [ - "lib/Bindings/Python", - ], linkshared = 1, linkstatic = 0, deps = [ @@ -8486,9 +8448,9 @@ cc_library( hdrs = ["include/mlir/Conversion/ConvertToLLVM/ToLLVMPass.h"], includes = ["include"], deps = [ - ":Analysis", ":ConversionPassIncGen", ":ConvertToLLVMInterface", + ":Analysis", ":IR", ":LLVMCommonConversion", ":LLVMDialect", diff --git a/utils/bazel/third_party_build/nanobind.BUILD b/utils/bazel/third_party_build/nanobind.BUILD deleted file mode 100644 index 262d14a040b87..0000000000000 --- a/utils/bazel/third_party_build/nanobind.BUILD +++ /dev/null @@ -1,25 +0,0 @@ -cc_library( - name = "nanobind", - srcs = glob( - [ - "src/*.cpp", - ], - exclude = ["src/nb_combined.cpp"], - ), - defines = [ - "NB_BUILD=1", - "NB_SHARED=1", - ], - includes = ["include"], - textual_hdrs = glob( - [ - "include/**/*.h", - "src/*.h", - ], - ), - visibility = ["//visibility:public"], - deps = [ - "@robin_map", - "@rules_python//python/cc:current_py_cc_headers", - ], -) diff --git a/utils/bazel/third_party_build/robin_map.BUILD b/utils/bazel/third_party_build/robin_map.BUILD deleted file mode 100644 index b8d04beaed81f..0000000000000 --- a/utils/bazel/third_party_build/robin_map.BUILD +++ /dev/null @@ -1,12 +0,0 @@ -cc_library( - name = "robin_map", - hdrs = [ - "include/tsl/robin_growth_policy.h", - "include/tsl/robin_hash.h", - "include/tsl/robin_map.h", - "include/tsl/robin_set.h", - ], - includes = ["."], - strip_include_prefix = "include", - visibility = ["//visibility:public"], -) From 95a4d30b0d64c544106acf306c3d3fa5fde99c02 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 3 Dec 2024 12:26:47 -0500 Subject: [PATCH 097/191] [NFC] Remove trailing white spaces in `clang/docs/ReleaseNotes.rst` --- clang/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 922f49c453e15..0b945d73f0d74 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -310,7 +310,7 @@ Resolutions to C++ Defect Reports by default. (`CWG2521: User-defined literals and reserved identifiers `_). -- Fix name lookup for a dependent base class that is the current instantiation. +- Fix name lookup for a dependent base class that is the current instantiation. (`CWG591: When a dependent base class is the current instantiation `_). C Language Changes From eb257fe37ba1ea41bd162e2fbd0ee4cd33fcdcea Mon Sep 17 00:00:00 2001 From: erichkeane Date: Tue, 3 Dec 2024 09:30:21 -0800 Subject: [PATCH 098/191] [OpenACC] Enable 3 more clauses for combined constructs. 'num_gangs', 'num_workers', and 'vector_length' are similar to eachother, and are all the same implementation as for compute constructs, so this patch just enables them and adds the necessary testing to ensure they work correctly. These will get more complicated when they get combined with 'gang', 'worker', 'vector' and 'reduction', but those restrictions will be implemented when those clauses are enabled. --- clang/lib/Sema/SemaOpenACC.cpp | 23 +- .../ast-print-openacc-combined-construct.cpp | 18 ++ ...d-construct-auto_seq_independent-clauses.c | 36 --- .../combined-construct-device_type-clause.c | 1 - .../combined-construct-num_gangs-ast.cpp | 121 +++++++++ .../combined-construct-num_gangs-clause.c | 45 ++++ .../combined-construct-num_workers-ast.cpp | 230 ++++++++++++++++++ .../combined-construct-num_workers-clause.c | 37 +++ .../combined-construct-vector_length-ast.cpp | 98 ++++++++ .../combined-construct-vector_length-clause.c | 37 +++ 10 files changed, 587 insertions(+), 59 deletions(-) create mode 100644 clang/test/SemaOpenACC/combined-construct-num_gangs-ast.cpp create mode 100644 clang/test/SemaOpenACC/combined-construct-num_gangs-clause.c create mode 100644 clang/test/SemaOpenACC/combined-construct-num_workers-ast.cpp create mode 100644 clang/test/SemaOpenACC/combined-construct-num_workers-clause.c create mode 100644 clang/test/SemaOpenACC/combined-construct-vector_length-ast.cpp create mode 100644 clang/test/SemaOpenACC/combined-construct-vector_length-clause.c diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 654f3cd97c1c5..060df967322ac 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -694,14 +694,6 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitSelfClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitNumGangsClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute' constructs, and - // 'compute' constructs are the only construct that can do anything with - // this yet, so skip/treat as unimplemented in this case. - // TODO OpenACC: Remove this check when we have combined constructs for this - // clause. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) - return isNotImplemented(); - // There is no prose in the standard that says duplicates aren't allowed, // but this diagnostic is present in other compilers, as well as makes // sense. @@ -730,6 +722,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitNumGangsClause( // OpenACC 3.3 Section 2.5.4: // A reduction clause may not appear on a parallel construct with a // num_gangs clause that has more than one argument. + // TODO: OpenACC: Reduction on Combined Construct needs to do this too. if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel && Clause.getIntExprs().size() > 1) { auto *Parallel = @@ -751,13 +744,6 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitNumGangsClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitNumWorkersClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute' constructs, and - // 'compute' constructs are the only construct that can do anything with - // this yet, so skip/treat as unimplemented in this case. - // TODO: OpenACC: Remove when we get combined constructs. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) - return isNotImplemented(); - // There is no prose in the standard that says duplicates aren't allowed, // but this diagnostic is present in other compilers, as well as makes // sense. @@ -773,13 +759,6 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitNumWorkersClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorLengthClause( SemaOpenACC::OpenACCParsedClause &Clause) { - // Restrictions only properly implemented on 'compute' constructs, and - // 'compute' constructs are the only construct that can do anything with - // this yet, so skip/treat as unimplemented in this case. - // TODO: OpenACC: Remove when we get combined constructs. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) - return isNotImplemented(); - // There is no prose in the standard that says duplicates aren't allowed, // but this diagnostic is present in other compilers, as well as makes // sense. diff --git a/clang/test/AST/ast-print-openacc-combined-construct.cpp b/clang/test/AST/ast-print-openacc-combined-construct.cpp index d16e446706807..435c770c7457d 100644 --- a/clang/test/AST/ast-print-openacc-combined-construct.cpp +++ b/clang/test/AST/ast-print-openacc-combined-construct.cpp @@ -224,4 +224,22 @@ void foo() { for(int i = 0;i<5;++i) for(int i = 0;i<5;++i); +// CHECK: #pragma acc parallel loop num_gangs(i, (int)array[2]) +// CHECK-NEXT: for (int i = 0; i < 5; ++i) +// CHECK-NEXT: ; +#pragma acc parallel loop num_gangs(i, (int)array[2]) + for(int i = 0;i<5;++i); + +// CHECK: #pragma acc parallel loop num_workers(i) +// CHECK-NEXT: for (int i = 0; i < 5; ++i) +// CHECK-NEXT: ; +#pragma acc parallel loop num_workers(i) + for(int i = 0;i<5;++i); + +// CHECK: #pragma acc parallel loop vector_length((int)array[1]) +// CHECK-NEXT: for (int i = 0; i < 5; ++i) +// CHECK-NEXT: ; +#pragma acc parallel loop vector_length((int)array[1]) + for(int i = 0;i<5;++i); + } diff --git a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c index fc5250ce548e4..a6f57a63a91dd 100644 --- a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c +++ b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c @@ -134,16 +134,10 @@ void uses() { // expected-warning@+1{{OpenACC clause 'bind' not yet implemented}} #pragma acc parallel loop auto bind(Var) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'vector_length' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented}} #pragma acc parallel loop auto vector_length(1) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented}} #pragma acc parallel loop auto num_gangs(1) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_workers' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented}} #pragma acc parallel loop auto num_workers(1) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'device_num' not yet implemented}} @@ -261,16 +255,10 @@ void uses() { // expected-warning@+1{{OpenACC clause 'bind' not yet implemented}} #pragma acc parallel loop bind(Var) auto for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'vector_length' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented}} #pragma acc parallel loop vector_length(1) auto for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented}} #pragma acc parallel loop num_gangs(1) auto for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_workers' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented}} #pragma acc parallel loop num_workers(1) auto for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'device_num' not yet implemented}} @@ -389,16 +377,10 @@ void uses() { // expected-warning@+1{{OpenACC clause 'bind' not yet implemented}} #pragma acc parallel loop independent bind(Var) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'vector_length' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented}} #pragma acc parallel loop independent vector_length(1) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented}} #pragma acc parallel loop independent num_gangs(1) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_workers' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented}} #pragma acc parallel loop independent num_workers(1) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'device_num' not yet implemented}} @@ -516,16 +498,10 @@ void uses() { // expected-warning@+1{{OpenACC clause 'bind' not yet implemented}} #pragma acc parallel loop bind(Var) independent for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'vector_length' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented}} #pragma acc parallel loop vector_length(1) independent for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented}} #pragma acc parallel loop num_gangs(1) independent for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_workers' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented}} #pragma acc parallel loop num_workers(1) independent for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'device_num' not yet implemented}} @@ -650,16 +626,10 @@ void uses() { // expected-warning@+1{{OpenACC clause 'bind' not yet implemented}} #pragma acc parallel loop seq bind(Var) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'vector_length' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented}} #pragma acc parallel loop seq vector_length(1) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented}} #pragma acc parallel loop seq num_gangs(1) for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_workers' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented}} #pragma acc parallel loop seq num_workers(1) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'device_num' not yet implemented}} @@ -783,16 +753,10 @@ void uses() { // expected-warning@+1{{OpenACC clause 'bind' not yet implemented}} #pragma acc parallel loop bind(Var) seq for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'vector_length' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented}} #pragma acc parallel loop vector_length(1) seq for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented}} #pragma acc parallel loop num_gangs(1) seq for(unsigned i = 0; i < 5; ++i); - // TODOexpected-error@+1{{OpenACC 'num_workers' clause is not valid on 'parallel loop' directive}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented}} #pragma acc parallel loop num_workers(1) seq for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'device_num' not yet implemented}} diff --git a/clang/test/SemaOpenACC/combined-construct-device_type-clause.c b/clang/test/SemaOpenACC/combined-construct-device_type-clause.c index a5ab39cb12c38..9a60fb4c665e5 100644 --- a/clang/test/SemaOpenACC/combined-construct-device_type-clause.c +++ b/clang/test/SemaOpenACC/combined-construct-device_type-clause.c @@ -195,7 +195,6 @@ void uses() { // expected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'serial loop' directive}} #pragma acc serial loop device_type(*) num_gangs(1) for(int i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented, clause ignored}} #pragma acc parallel loop device_type(*) num_workers(1) for(int i = 0; i < 5; ++i); // expected-error@+2{{OpenACC clause 'device_num' may not follow a 'device_type' clause in a 'serial loop' construct}} diff --git a/clang/test/SemaOpenACC/combined-construct-num_gangs-ast.cpp b/clang/test/SemaOpenACC/combined-construct-num_gangs-ast.cpp new file mode 100644 index 0000000000000..6e75a00943364 --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-num_gangs-ast.cpp @@ -0,0 +1,121 @@ +// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s + +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER +int some_int(); +short some_short(); +long some_long(); +struct CorrectConvert { + operator int(); +} Convert; + + +void NormalUses() { + // CHECK: FunctionDecl{{.*}}NormalUses + // CHECK-NEXT: CompoundStmt + +#pragma acc parallel loop num_gangs(some_int(), some_long(), some_short()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_gangs clause + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'int ()' lvalue Function{{.*}} 'some_int' 'int ()' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'long ()' lvalue Function{{.*}} 'some_long' 'long ()' + // CHECK-NEXT: CallExpr{{.*}}'short' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'short (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'short ()' lvalue Function{{.*}} 'some_short' 'short ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc kernels loop num_gangs(some_int()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_gangs clause + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'int ()' lvalue Function{{.*}} 'some_int' 'int ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} + +template +void TemplUses(T t, U u) { + // CHECK-NEXT: FunctionTemplateDecl + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 0 T + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 1 U + // CHECK-NEXT: FunctionDecl{{.*}} TemplUses 'void (T, U)' + // CHECK-NEXT: ParmVarDecl{{.*}} t 'T' + // CHECK-NEXT: ParmVarDecl{{.*}} u 'U' + // CHECK-NEXT: CompoundStmt + +#pragma acc kernels loop num_gangs(u) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_gangs clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'U' lvalue ParmVar{{.*}} 'u' 'U' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop num_gangs(u, U::value) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_gangs clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'U' lvalue ParmVar{{.*}} 'u' 'U' + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'U' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // Check the instantiated versions of the above. + // CHECK-NEXT: FunctionDecl{{.*}} used TemplUses 'void (CorrectConvert, HasInt)' implicit_instantiation + // CHECK-NEXT: TemplateArgument type 'CorrectConvert' + // CHECK-NEXT: RecordType{{.*}} 'CorrectConvert' + // CHECK-NEXT: CXXRecord{{.*}} 'CorrectConvert' + // CHECK-NEXT: TemplateArgument type 'HasInt' + // CHECK-NEXT: RecordType{{.*}} 'HasInt' + // CHECK-NEXT: CXXRecord{{.*}} 'HasInt' + // CHECK-NEXT: ParmVarDecl{{.*}} t 'CorrectConvert' + // CHECK-NEXT: ParmVarDecl{{.*}} u 'HasInt' + // CHECK-NEXT: CompoundStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_gangs clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'char' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator char + // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_gangs clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'char' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator char + // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const int' lvalue Var{{.*}} 'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'HasInt' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} + +struct HasInt { + using IntTy = int; + using ShortTy = short; + static constexpr int value = 1; + + operator char(); +}; + +void Inst() { + TemplUses({}, {}); +} +#endif // PCH_HELPER diff --git a/clang/test/SemaOpenACC/combined-construct-num_gangs-clause.c b/clang/test/SemaOpenACC/combined-construct-num_gangs-clause.c new file mode 100644 index 0000000000000..bd035bd4a51a2 --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-num_gangs-clause.c @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +short getS(); +float getF(); +void Test() { +#pragma acc kernels loop num_gangs(1) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC 'num_gangs' clause is not valid on 'serial loop' directive}} +#pragma acc serial loop num_gangs(1) + for(int i = 5; i < 10;++i); + +#pragma acc parallel loop num_gangs(1) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC clause 'num_gangs' requires expression of integer type}} +#pragma acc parallel loop num_gangs(getF()) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{expected expression}} +#pragma acc kernels loop num_gangs() + for(int i = 5; i < 10;++i); + + // expected-error@+1{{expected expression}} +#pragma acc parallel loop num_gangs() + for(int i = 5; i < 10;++i); + + // expected-error@+2{{OpenACC 'num_gangs' clause cannot appear more than once on a 'kernels loop' directive}} + // expected-note@+1{{previous clause is here}} +#pragma acc kernels loop num_gangs(1) num_gangs(2) + for(int i = 5; i < 10;++i); + + // expected-error@+2{{OpenACC 'num_gangs' clause cannot appear more than once on a 'parallel loop' directive}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel loop num_gangs(1) num_gangs(2) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{too many integer expression arguments provided to OpenACC 'num_gangs' clause: 'kernels loop' directive expects maximum of 1, 2 were provided}} +#pragma acc kernels loop num_gangs(1, getS()) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{too many integer expression arguments provided to OpenACC 'num_gangs' clause: 'parallel loop' directive expects maximum of 3, 4 were provided}} +#pragma acc parallel loop num_gangs(getS(), 1, getS(), 1) + for(int i = 5; i < 10;++i); +} diff --git a/clang/test/SemaOpenACC/combined-construct-num_workers-ast.cpp b/clang/test/SemaOpenACC/combined-construct-num_workers-ast.cpp new file mode 100644 index 0000000000000..8aa361c7b037c --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-num_workers-ast.cpp @@ -0,0 +1,230 @@ +// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s + +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER +int some_int(); +short some_short(); +long some_long(); +enum E{}; +E some_enum(); +struct CorrectConvert { + operator int(); +} Convert; + + +void NormalUses() { + // CHECK: FunctionDecl{{.*}}NormalUses + // CHECK-NEXT: CompoundStmt +#pragma acc parallel loop num_workers(some_int()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'int ()' lvalue Function{{.*}} 'some_int' 'int ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc kernels loop num_workers(some_short()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CallExpr{{.*}}'short' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'short (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'short ()' lvalue Function{{.*}} 'some_short' 'short ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop num_workers(some_long()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'long ()' lvalue Function{{.*}} 'some_long' 'long ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop num_workers(some_enum()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CallExpr{{.*}}'E' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'E (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'E ()' lvalue Function{{.*}} 'some_enum' 'E ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc kernels loop num_workers(Convert) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'int' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator int + // CHECK-NEXT: DeclRefExpr{{.*}} 'struct CorrectConvert':'CorrectConvert' lvalue Var + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} + +template +void TemplUses(T t, U u) { + // CHECK-NEXT: FunctionTemplateDecl + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 0 T + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 1 U + // CHECK-NEXT: FunctionDecl{{.*}} TemplUses 'void (T, U)' + // CHECK-NEXT: ParmVarDecl{{.*}} referenced t 'T' + // CHECK-NEXT: ParmVarDecl{{.*}} referenced u 'U' + // CHECK-NEXT: CompoundStmt + +#pragma acc parallel loop num_workers(t) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue ParmVar{{.*}} 't' 'T' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc kernels loop num_workers(u) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'U' lvalue ParmVar{{.*}} 'u' 'U' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop num_workers(U::value) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'U' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc kernels loop num_workers(T{}) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}} 'T' 'T' list + // CHECK-NEXT: InitListExpr{{.*}} 'void' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop num_workers(U{}) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}} 'U' 'U' list + // CHECK-NEXT: InitListExpr{{.*}} 'void' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc kernels loop num_workers(typename U::IntTy{}) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}} 'typename U::IntTy' 'typename U::IntTy' list + // CHECK-NEXT: InitListExpr{{.*}} 'void' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop num_workers(typename U::ShortTy{}) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}} 'typename U::ShortTy' 'typename U::ShortTy' list + // CHECK-NEXT: InitListExpr{{.*}} 'void' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // Check the instantiated versions of the above. + // CHECK-NEXT: FunctionDecl{{.*}} used TemplUses 'void (CorrectConvert, HasInt)' implicit_instantiation + // CHECK-NEXT: TemplateArgument type 'CorrectConvert' + // CHECK-NEXT: RecordType{{.*}} 'CorrectConvert' + // CHECK-NEXT: CXXRecord{{.*}} 'CorrectConvert' + // CHECK-NEXT: TemplateArgument type 'HasInt' + // CHECK-NEXT: RecordType{{.*}} 'HasInt' + // CHECK-NEXT: CXXRecord{{.*}} 'HasInt' + // CHECK-NEXT: ParmVarDecl{{.*}} used t 'CorrectConvert' + // CHECK-NEXT: ParmVarDecl{{.*}} used u 'HasInt' + // CHECK-NEXT: CompoundStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'int' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator int + // CHECK-NEXT: DeclRefExpr{{.*}} 'CorrectConvert' lvalue ParmVar + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'char' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator char + // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const int' lvalue Var{{.*}} 'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'HasInt' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'int' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator int + // CHECK-NEXT: MaterializeTemporaryExpr{{.*}} 'CorrectConvert' lvalue + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}} 'CorrectConvert' functional cast to struct CorrectConvert + // CHECK-NEXT: InitListExpr{{.*}}'CorrectConvert' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'char' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator char + // CHECK-NEXT: MaterializeTemporaryExpr{{.*}} 'HasInt' lvalue + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}} 'HasInt' functional cast to struct HasInt + // CHECK-NEXT: InitListExpr{{.*}}'HasInt' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}} 'typename HasInt::IntTy':'int' functional cast to typename struct HasInt::IntTy + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::IntTy':'int' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: num_workers clause + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}} 'typename HasInt::ShortTy':'short' functional cast to typename struct HasInt::ShortTy + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} +struct HasInt { + using IntTy = int; + using ShortTy = short; + static constexpr int value = 1; + + operator char(); +}; + +void Inst() { + TemplUses({}, {}); +} +#endif // PCH_HELPER diff --git a/clang/test/SemaOpenACC/combined-construct-num_workers-clause.c b/clang/test/SemaOpenACC/combined-construct-num_workers-clause.c new file mode 100644 index 0000000000000..a5891f071bb03 --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-num_workers-clause.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +short getS(); +float getF(); +void Test() { +#pragma acc kernels loop num_workers(1) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC 'num_workers' clause is not valid on 'serial loop' directive}} +#pragma acc serial loop num_workers(1) + for(int i = 5; i < 10;++i); + +#pragma acc parallel loop num_workers(1) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC clause 'num_workers' requires expression of integer type}} +#pragma acc parallel loop num_workers(getF()) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{expected expression}} +#pragma acc kernels loop num_workers() + for(int i = 5; i < 10;++i); + + // expected-error@+1{{expected expression}} +#pragma acc parallel loop num_workers() + for(int i = 5; i < 10;++i); + + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} +#pragma acc kernels loop num_workers(1, 2) + for(int i = 5; i < 10;++i); + + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} +#pragma acc parallel loop num_workers(1, 2) + for(int i = 5; i < 10;++i); +} diff --git a/clang/test/SemaOpenACC/combined-construct-vector_length-ast.cpp b/clang/test/SemaOpenACC/combined-construct-vector_length-ast.cpp new file mode 100644 index 0000000000000..6cfc9c6b8b2c2 --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-vector_length-ast.cpp @@ -0,0 +1,98 @@ +// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s + +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER +short some_short(); + +struct CorrectConvert { + operator int(); +} Convert; + + +void NormalUses() { + // CHECK: FunctionDecl{{.*}}NormalUses + // CHECK-NEXT: CompoundStmt + +#pragma acc kernels loop vector_length(some_short()) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: vector_length clause + // CHECK-NEXT: CallExpr{{.*}}'short' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'short (*)()' + // CHECK-NEXT: DeclRefExpr{{.*}}'short ()' lvalue Function{{.*}} 'some_short' 'short ()' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} +template +void TemplUses(T t, U u) { + // CHECK-NEXT: FunctionTemplateDecl + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 0 T + // CHECK-NEXT: TemplateTypeParmDecl{{.*}}typename depth 0 index 1 U + // CHECK-NEXT: FunctionDecl{{.*}} TemplUses 'void (T, U)' + // CHECK-NEXT: ParmVarDecl{{.*}} t 'T' + // CHECK-NEXT: ParmVarDecl{{.*}} u 'U' + // CHECK-NEXT: CompoundStmt + +#pragma acc kernels loop vector_length(u) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: vector_length clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'U' lvalue ParmVar{{.*}} 'u' 'U' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + +#pragma acc parallel loop vector_length(U::value) + for (unsigned i = 0; i < 5; ++i); + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: vector_length clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'U' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // Check the instantiated versions of the above. + // CHECK-NEXT: FunctionDecl{{.*}} used TemplUses 'void (CorrectConvert, HasInt)' implicit_instantiation + // CHECK-NEXT: TemplateArgument type 'CorrectConvert' + // CHECK-NEXT: RecordType{{.*}} 'CorrectConvert' + // CHECK-NEXT: CXXRecord{{.*}} 'CorrectConvert' + // CHECK-NEXT: TemplateArgument type 'HasInt' + // CHECK-NEXT: RecordType{{.*}} 'HasInt' + // CHECK-NEXT: CXXRecord{{.*}} 'HasInt' + // CHECK-NEXT: ParmVarDecl{{.*}} t 'CorrectConvert' + // CHECK-NEXT: ParmVarDecl{{.*}} u 'HasInt' + // CHECK-NEXT: CompoundStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} kernels loop + // CHECK-NEXT: vector_length clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'char' + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}} '' .operator char + // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar + // CHECK-NEXT: ForStmt + // CHECK: NullStmt + + // CHECK-NEXT: OpenACCCombinedConstruct{{.*}} parallel loop + // CHECK-NEXT: vector_length clause + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'int' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const int' lvalue Var{{.*}} 'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'HasInt' + // CHECK-NEXT: ForStmt + // CHECK: NullStmt +} + +struct HasInt { + using IntTy = int; + using ShortTy = short; + static constexpr int value = 1; + + operator char(); +}; + +void Inst() { + TemplUses({}, {}); +} +#endif // PCH_HELPER diff --git a/clang/test/SemaOpenACC/combined-construct-vector_length-clause.c b/clang/test/SemaOpenACC/combined-construct-vector_length-clause.c new file mode 100644 index 0000000000000..8b6dedd9b83ba --- /dev/null +++ b/clang/test/SemaOpenACC/combined-construct-vector_length-clause.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +short getS(); +float getF(); +void Test() { +#pragma acc kernels loop vector_length(1) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC 'vector_length' clause is not valid on 'serial loop' directive}} +#pragma acc serial loop vector_length(1) + for(int i = 5; i < 10;++i); + +#pragma acc parallel loop vector_length(1) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{OpenACC clause 'vector_length' requires expression of integer type}} +#pragma acc parallel loop vector_length(getF()) + for(int i = 5; i < 10;++i); + + // expected-error@+1{{expected expression}} +#pragma acc kernels loop vector_length() + for(int i = 5; i < 10;++i); + + // expected-error@+1{{expected expression}} +#pragma acc parallel loop vector_length() + for(int i = 5; i < 10;++i); + + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} +#pragma acc kernels loop vector_length(1, 2) + for(int i = 5; i < 10;++i); + + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} +#pragma acc parallel loop vector_length(1, 2) + for(int i = 5; i < 10;++i); +} From d1f4f6368064d8d7bf09fbd5be8d74846d532c30 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 3 Dec 2024 18:49:46 +0100 Subject: [PATCH 099/191] [SystemZ] Use nop mnemonics for disassembly To match the behavior of GNU binutils, output the nop family of mnemonic aliases when disassembling. --- lld/test/ELF/systemz-gotent-relax-und-dso.s | 4 +- lld/test/ELF/systemz-gotent-relax.s | 4 +- lld/test/ELF/systemz-plt.s | 6 +-- lld/test/ELF/systemz-tls-gd.s | 8 ++-- lld/test/ELF/systemz-tls-ld.s | 2 +- .../lib/Target/SystemZ/SystemZInstrFormats.td | 44 +++++++++++++++---- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 19 +++----- llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 4 +- llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 4 +- llvm/lib/Target/SystemZ/SystemZScheduleZ15.td | 3 +- llvm/lib/Target/SystemZ/SystemZScheduleZ16.td | 3 +- .../lib/Target/SystemZ/SystemZScheduleZ196.td | 3 +- .../Target/SystemZ/SystemZScheduleZEC12.td | 3 +- .../MC/Disassembler/SystemZ/insns-pcrel.txt | 16 +++---- llvm/test/MC/Disassembler/SystemZ/insns.txt | 2 +- llvm/test/MC/SystemZ/insn-good-zos-pcrel.s | 2 +- llvm/test/MC/SystemZ/insn-good.s | 30 ++++++------- 17 files changed, 89 insertions(+), 68 deletions(-) diff --git a/lld/test/ELF/systemz-gotent-relax-und-dso.s b/lld/test/ELF/systemz-gotent-relax-und-dso.s index 5a1bd7f949f89..e8b88056299cb 100644 --- a/lld/test/ELF/systemz-gotent-relax-und-dso.s +++ b/lld/test/ELF/systemz-gotent-relax-und-dso.s @@ -14,9 +14,9 @@ # DISASM: Disassembly of section .text: # DISASM-EMPTY: # DISASM-NEXT: : -# DISASM-NEXT: nop 0 +# DISASM-NEXT: nop # DISASM: : -# DISASM-NEXT: nop 0 +# DISASM-NEXT: nop # DISASM: <_start>: # DISASM-NEXT: lgrl %r1, 0x2400 # DISASM-NEXT: lgrl %r1, 0x2400 diff --git a/lld/test/ELF/systemz-gotent-relax.s b/lld/test/ELF/systemz-gotent-relax.s index e84fd8d4653e9..88b43a4e9d29c 100644 --- a/lld/test/ELF/systemz-gotent-relax.s +++ b/lld/test/ELF/systemz-gotent-relax.s @@ -30,9 +30,9 @@ # DISASM: Disassembly of section .text: # DISASM: 00000000010011e0 : -# DISASM-NEXT: nop 0 +# DISASM-NEXT: nop # DISASM: 00000000010011e4 : -# DISASM-NEXT: nop 0 +# DISASM-NEXT: nop # DISASM: 00000000010011e8 : # DISASM-NEXT: br %r14 # DISASM: 00000000010011ea <_start>: diff --git a/lld/test/ELF/systemz-plt.s b/lld/test/ELF/systemz-plt.s index c7563cd18c274..717343ce4c4d5 100644 --- a/lld/test/ELF/systemz-plt.s +++ b/lld/test/ELF/systemz-plt.s @@ -48,9 +48,9 @@ # DIS-NEXT: 100102c: d2 07 f0 30 10 08 mvc 48(8,%r15), 8(%r1) # DIS-NEXT: 1001032: e3 10 10 10 00 04 lg %r1, 16(%r1) # DIS-NEXT: 1001038: 07 f1 br %r1 -# DIS-NEXT: 100103a: 07 00 nopr %r0 -# DIS-NEXT: 100103c: 07 00 nopr %r0 -# DIS-NEXT: 100103e: 07 00 nopr %r0 +# DIS-NEXT: 100103a: 07 00 nopr +# DIS-NEXT: 100103c: 07 00 nopr +# DIS-NEXT: 100103e: 07 00 nopr # DIS-NEXT: 1001040: c0 10 00 00 10 54 larl %r1, 0x10030e8 # DIS-NEXT: 1001046: e3 10 10 00 00 04 lg %r1, 0(%r1) # DIS-NEXT: 100104c: 07 f1 br %r1 diff --git a/lld/test/ELF/systemz-tls-gd.s b/lld/test/ELF/systemz-tls-gd.s index 3976f55a6ae39..742797e2d62e4 100644 --- a/lld/test/ELF/systemz-tls-gd.s +++ b/lld/test/ELF/systemz-tls-gd.s @@ -58,17 +58,17 @@ ## TP offset of a is at 0x1002218 # LE-NEXT: lgrl %r2, 0x1002218 -# LE-NEXT: brcl 0, +# LE-NEXT: jgnop # LE-NEXT: lgf %r2, 0(%r2,%r7) ## TP offset of b is at 0x1002220 # LE-NEXT: lgrl %r2, 0x1002220 -# LE-NEXT: brcl 0, +# LE-NEXT: jgnop # LE-NEXT: lgf %r2, 0(%r2,%r7) ## TP offset of c is at 0x1002228 # LE-NEXT: lgrl %r2, 0x1002228 -# LE-NEXT: brcl 0, +# LE-NEXT: jgnop # LE-NEXT: lgf %r2, 0(%r2,%r7) ## TP offsets @@ -88,7 +88,7 @@ ## TP offset of a is at 0x1002340 # IE-NEXT: lgrl %r2, 0x1002340 -# IE-NEXT: brcl 0, +# IE-NEXT: jgnop # IE-NEXT: lgf %r2, 0(%r2,%r7) ## GOT offset of the TP offset for b is at 0x1002348 diff --git a/lld/test/ELF/systemz-tls-ld.s b/lld/test/ELF/systemz-tls-ld.s index 2cb36d7294f2b..ef104b82644ce 100644 --- a/lld/test/ELF/systemz-tls-ld.s +++ b/lld/test/ELF/systemz-tls-ld.s @@ -49,7 +49,7 @@ ## GOT offset of the LDM TLS module ID is at 0x1002210 # LE-NEXT: lgrl %r2, 0x1002210 -# LE-NEXT: brcl 0, +# LE-NEXT: jgnop # LE-NEXT: la %r2, 0(%r2,%r7) ## TP offset for a is at 0x1002218 diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 6e136b10aed42..ae8f669e9bab4 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2328,6 +2328,12 @@ class AsmCondBranchRI opcode> : InstRIc; +class NeverCondBranchRI opcode> + : InstRIc { + let M1 = 0; +} + class FixedCondBranchRI opcode, SDPatternOperator operator = null_frag> : InstRIc opcode> : InstRILc; +class NeverCondBranchRIL opcode> + : InstRILc { + let M1 = 0; +} + class FixedCondBranchRIL opcode> : InstRILc { @@ -2365,10 +2377,16 @@ class AsmCondBranchRR opcode> : InstRR; -class NeverCondBranchRR opcode> - : InstRR { - let R1 = 0; +multiclass NeverCondBranchRR opcode> { + // For the no-op (always false) branch, the target is optional. + def "" : InstRR { + let R1 = 0; + } + def Opt : InstRR { + let R1 = 0; + let R2 = 0; + } } class FixedCondBranchRR opcode, @@ -2392,11 +2410,19 @@ class AsmCondBranchRX opcode> (ins imm32zx4:$M1, (bdxaddr12only $B2, $D2, $X2):$XBD2), mnemonic#"\t$M1, $XBD2", []>; -class NeverCondBranchRX opcode> - : InstRXb { - let M1 = 0; +multiclass NeverCondBranchRX opcode> { + // For the no-op (always false) branch, the target is optional. + def "" : InstRXb { + let M1 = 0; + } + def Opt : InstRXb { + let M1 = 0; + let B2 = 0; + let D2 = 0; + let X2 = 0; + } } class FixedCondBranchRX opcode> diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index f3baf896658de..5cbba0d9c5edd 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -109,20 +109,11 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { } // NOPs. These are again variants of the conditional branches, with the -// condition mask set to "never". NOP_bare can't be an InstAlias since it -// would need R0D hard coded which is not part of ADDR64BitRegClass. -def NOP : NeverCondBranchRX<"nop", 0x47>; -let isAsmParserOnly = 1, hasNoSchedulingInfo = 1, M1 = 0, X2 = 0, B2 = 0, D2 = 0 in - def NOP_bare : InstRXb<0x47,(outs), (ins), "nop", []>; -def NOPR : NeverCondBranchRR<"nopr", 0x07>; -def NOPR_bare : InstAlias<"nopr", (NOPR R0D), 0>; - -// An alias of BRC 0, label -def JNOP : InstAlias<"jnop\t$RI2", (BRCAsm 0, brtarget16:$RI2), 0>; - -// An alias of BRCL 0, label -// jgnop on gnu ; jlnop on hlasm -def JGNOP : InstAlias<"{jgnop|jlnop}\t$RI2", (BRCLAsm 0, brtarget32:$RI2), 0>; +// condition mask set to "never". +defm NOP : NeverCondBranchRX<"nop", 0x47>; +defm NOPR : NeverCondBranchRR<"nopr", 0x07>; +def JNOP : NeverCondBranchRI<"jnop", 0xA74>; +def JGNOP : NeverCondBranchRIL<"j{g|l}nop", 0xC04>; // Fused compare-and-branch instructions. // diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index d0fec02777875..094b481b81f83 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -1557,7 +1557,7 @@ def : InstRW<[WLat30, MCD], (instregex "SAL$")>; // NOPs //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; - +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>; } diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index a6d89ce9443c5..b9376d422ded2 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -1647,7 +1647,7 @@ def : InstRW<[WLat30, MCD], (instregex "SAL$")>; // NOPs //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; - +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>; } diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index 455354e283ad8..5e28bf935a24b 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -1694,6 +1694,7 @@ def : InstRW<[WLat30, MCD], (instregex "SAL$")>; // NOPs //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>; } diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td index 92abf0ba4022c..2c01691707cc3 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -1727,6 +1727,7 @@ def : InstRW<[WLat30, MCD], (instregex "SAL$")>; // NOPs //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>; } diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 99d0d674bbbb2..f41a7057bb1f6 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -1239,6 +1239,7 @@ def : InstRW<[WLat30, MCD], (instregex "SAL$")>; // NOPs //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, LSU, EndGroup], (instregex "NOP(R)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "J(G)?NOP$")>; } diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 5b334da2bac34..8f0a10d2863a3 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -1284,6 +1284,7 @@ def : InstRW<[WLat30, MCD], (instregex "SAL$")>; // NOPs //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, LSU, NormalGr], (instregex "NOP(R)?$")>; +def : InstRW<[WLat1, LSU, NormalGr], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>; } diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-pcrel.txt b/llvm/test/MC/Disassembler/SystemZ/insns-pcrel.txt index ef1d0f1970d16..f12441c9c6cba 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns-pcrel.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns-pcrel.txt @@ -52,7 +52,7 @@ 0xa7 0xf5 0x7f 0xff # 0x0000003c: -# CHECK: brcl 0, 0x3c +# CHECK: jgnop 0x3c 0xc0 0x04 0x00 0x00 0x00 0x00 # 0x00000042: @@ -116,15 +116,15 @@ 0xc0 0xf4 0x00 0x00 0x00 0x00 # 0x0000009c: -# CHECK: brcl 0, 0x9a +# CHECK: jgnop 0x9a 0xc0 0x04 0xff 0xff 0xff 0xff # 0x000000a2: -# CHECK: brcl 0, 0xffffffff000000a2 +# CHECK: jgnop 0xffffffff000000a2 0xc0 0x04 0x80 0x00 0x00 0x00 # 0x000000a8: -# CHECK: brcl 0, 0x1000000a6 +# CHECK: jgnop 0x1000000a6 0xc0 0x04 0x7f 0xff 0xff 0xff # 0x000000ae: @@ -140,7 +140,7 @@ 0xc0 0xf4 0x7f 0xff 0xff 0xff # 0x000000c0: -# CHECK: brc 0, 0xc0 +# CHECK: jnop 0xc0 0xa7 0x04 0x00 0x00 # 0x000000c4: @@ -204,15 +204,15 @@ 0xa7 0xf4 0x00 0x00 # 0x00000100: -# CHECK: brc 0, 0xfe +# CHECK: jnop 0xfe 0xa7 0x04 0xff 0xff # 0x00000104: -# CHECK: brc 0, 0xffffffffffff0104 +# CHECK: jnop 0xffffffffffff0104 0xa7 0x04 0x80 0x00 # 0x00000108: -# CHECK: brc 0, 0x10106 +# CHECK: jnop 0x10106 0xa7 0x04 0x7f 0xff # 0x0000010c: diff --git a/llvm/test/MC/Disassembler/SystemZ/insns.txt b/llvm/test/MC/Disassembler/SystemZ/insns.txt index 07a1ff6d18388..a4e4a2203a467 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns.txt @@ -1315,7 +1315,7 @@ # CHECK: bassm %r15, %r1 0x0c 0xf1 -# CHECK: nop 0 +# CHECK: nop 0x47 0x00 0x00 0x00 # CHECK: nop 4095 diff --git a/llvm/test/MC/SystemZ/insn-good-zos-pcrel.s b/llvm/test/MC/SystemZ/insn-good-zos-pcrel.s index 734520798baa6..0acbe26d75b15 100644 --- a/llvm/test/MC/SystemZ/insn-good-zos-pcrel.s +++ b/llvm/test/MC/SystemZ/insn-good-zos-pcrel.s @@ -3,7 +3,7 @@ *CHECK: brcl 0, FOO * encoding: [0xc0,0x04,A,A,A,A] *CHECK: fixup A - offset: 2, value: FOO+2, kind: FK_390_PC32DBL -*CHECK: brcl 0, FOO * encoding: [0xc0,0x04,A,A,A,A] +*CHECK: jgnop FOO * encoding: [0xc0,0x04,A,A,A,A] *CHECK: fixup A - offset: 2, value: FOO+2, kind: FK_390_PC32DBL brcl 0,FOO jlnop FOO diff --git a/llvm/test/MC/SystemZ/insn-good.s b/llvm/test/MC/SystemZ/insn-good.s index 09f55049546c2..93f5ff27780ab 100644 --- a/llvm/test/MC/SystemZ/insn-good.s +++ b/llvm/test/MC/SystemZ/insn-good.s @@ -1398,35 +1398,35 @@ #CHECK: brc 0, .[[LAB:L.*]]-65536 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL -#CHECK: brc 0, .[[LAB:L.*]]-65536 # encoding: [0xa7,0x04,A,A] +#CHECK: jnop .[[LAB:L.*]]-65536 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL brc 0, -0x10000 jnop -0x10000 #CHECK: brc 0, .[[LAB:L.*]]-2 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL -#CHECK: brc 0, .[[LAB:L.*]]-2 # encoding: [0xa7,0x04,A,A] +#CHECK: jnop .[[LAB:L.*]]-2 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL brc 0, -2 jnop -2 #CHECK: brc 0, .[[LAB:L.*]] # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL -#CHECK: brc 0, .[[LAB:L.*]] # encoding: [0xa7,0x04,A,A] +#CHECK: jnop .[[LAB:L.*]] # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL brc 0, 0 jnop 0 #CHECK: brc 0, .[[LAB:L.*]]+65534 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL -#CHECK: brc 0, .[[LAB:L.*]]+65534 # encoding: [0xa7,0x04,A,A] +#CHECK: jnop .[[LAB:L.*]]+65534 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL brc 0, 0xfffe jnop 0xfffe #CHECK: brc 0, foo # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL -#CHECK: brc 0, foo # encoding: [0xa7,0x04,A,A] +#CHECK: jnop foo # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL brc 0, foo jnop foo @@ -1623,7 +1623,7 @@ #CHECK: brc 0, bar+100 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL -#CHECK: brc 0, bar+100 # encoding: [0xa7,0x04,A,A] +#CHECK: jnop bar+100 # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL brc 0, bar+100 jnop bar+100 @@ -1735,7 +1735,7 @@ #CHECK: brc 0, bar@PLT # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL -#CHECK: brc 0, bar@PLT # encoding: [0xa7,0x04,A,A] +#CHECK: jnop bar@PLT # encoding: [0xa7,0x04,A,A] #CHECK: fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL brc 0, bar@PLT jnop bar@PLT @@ -1847,32 +1847,32 @@ #CHECK: brcl 0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL brcl 0, -0x100000000 jgnop -0x100000000 #CHECK: brcl 0, .[[LAB:L.*]]-2 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, .[[LAB:L.*]]-2 # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop .[[LAB:L.*]]-2 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL brcl 0, -2 jgnop -2 #CHECK: brcl 0, .[[LAB:L.*]] # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, .[[LAB:L.*]] # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop .[[LAB:L.*]] # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL brcl 0, 0 jgnop 0 #CHECK: brcl 0, .[[LAB:L.*]]+4294967294 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, .[[LAB:L.*]]+4294967294 # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop .[[LAB:L.*]]+4294967294 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL brcl 0, 0xfffffffe jgnop 0xfffffffe #CHECK: brcl 0, foo # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, foo # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop foo # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL brcl 0, foo jgnop foo @@ -2065,7 +2065,7 @@ #CHECK: brcl 0, bar+100 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, bar+100 # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop bar+100 # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL brcl 0, bar+100 jgnop bar+100 @@ -2177,7 +2177,7 @@ #CHECK: brcl 0, bar@PLT # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL -#CHECK: brcl 0, bar@PLT # encoding: [0xc0,0x04,A,A,A,A] +#CHECK: jgnop bar@PLT # encoding: [0xc0,0x04,A,A,A,A] #CHECK: fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL brcl 0, bar@PLT jgnop bar@PLT @@ -13142,7 +13142,7 @@ #CHECK: nop 0 # encoding: [0x47,0x00,0x00,0x00] #CHECK: nop # encoding: [0x47,0x00,0x00,0x00] #CHECK: nopr %r7 # encoding: [0x07,0x07] -#CHECK: nopr %r0 # encoding: [0x07,0x00] +#CHECK: nopr # encoding: [0x07,0x00] nop 0 nop From ce416a073be6da06be7fd2d427b896b380fb3877 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 3 Dec 2024 10:05:10 -0800 Subject: [PATCH 100/191] [Github] fix libc documentation path (#118514) Running `ninja docs-libc-html` produces content in {build_dir}/libc/docs, not {build_dir}/docs. Presubmit jobs for "Test documentation build" were failing as a result of commit 97f94af3560d ("[Github] Upload built docs as artifact from test build docs job (#118159)") Link: #118159 Link: #117220 --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2e4009994a2b0..fe9c70cf1f5a9 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -162,7 +162,7 @@ jobs: cmake -B libc-build -GNinja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_RUNTIMES="libc" -DLLVM_ENABLE_SPHINX=ON ./runtimes TZ=UTC ninja -C libc-build docs-libc-html mkdir built-docs/libc - cp -r libc-build/docs/* built-docs/libc/ + cp -r libc-build/libc/docs/* built-docs/libc/ - name: Build LLD docs if: steps.docs-changed-subprojects.outputs.lld_any_changed == 'true' run: | From f44bee78c471df8f0a258c656a921a082d7e836a Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 3 Dec 2024 10:09:40 -0800 Subject: [PATCH 101/191] [lldb] Fix the DWARF index cache when index is partial. (#118390) The ManualDWARFIndex class can create a index cache if the LLDB index cache is enabled. This used to save the index to the same file, regardless of wether the cache was a full index (no .debug_names) or a partial index (have .debug_names, but not all .o files were had .debug_names). So we could end up saving an index cache that was partial, and then later load that partial index as if it were a full index if the user set the 'settings set plugin.symbol-file.dwarf.ignore-file-indexes true'. This would cause us to ignore the .debug_names section, and if the index cache was enabled, we could end up loading the partial index as if it were a full DWARF index. This patch detects when the ManualDWARFIndex is being used with .debug_names, and saves out a cache file with a suffix of "-full" or "-partial" to avoid this issue. --- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 24 ++++++- .../SymbolFile/DWARF/ManualDWARFIndex.h | 10 +++ .../SymbolFile/DWARF/x86/dwp-index-cache.cpp | 62 +++++++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 1220e6115a2a9..0be19ab29ef08 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -706,6 +706,11 @@ bool ManualDWARFIndex::Encode(DataEncoder &encoder) const { return true; } +bool ManualDWARFIndex::IsPartial() const { + // If we have units or type units to skip, then this index is partial. + return !m_units_to_avoid.empty() || !m_type_sigs_to_avoid.empty(); +} + std::string ManualDWARFIndex::GetCacheKey() { std::string key; llvm::raw_string_ostream strm(key); @@ -713,9 +718,26 @@ std::string ManualDWARFIndex::GetCacheKey() { // module can have one object file as the main executable and might have // another object file in a separate symbol file, or we might have a .dwo file // that claims its module is the main executable. + + // This class can be used to index all of the DWARF, or part of the DWARF + // when there is a .debug_names index where some compile or type units were + // built without .debug_names. So we need to know when we have a full manual + // DWARF index or a partial manual DWARF index and save them to different + // cache files. Before this fix we might end up debugging a binary with + // .debug_names where some of the compile or type units weren't indexed, and + // find an issue with the .debug_names tables (bugs or being incomplete), and + // then we disable loading the .debug_names by setting a setting in LLDB by + // running "settings set plugin.symbol-file.dwarf.ignore-file-indexes 0" in + // another LLDB instance. The problem arose when there was an index cache from + // a previous run where .debug_names was enabled and it had saved a cache file + // that only covered the missing compile and type units from the .debug_names, + // and with the setting that disables the loading of the cache files we would + // load partial cache index cache. So we need to pick a unique cache suffix + // name that indicates if the cache is partial or full to avoid this problem. + llvm::StringRef dwarf_index_suffix(IsPartial() ? "partial-" : "full-"); ObjectFile *objfile = m_dwarf->GetObjectFile(); strm << objfile->GetModule()->GetCacheKey() << "-dwarf-index-" - << llvm::format_hex(objfile->GetCacheHash(), 10); + << dwarf_index_suffix << llvm::format_hex(objfile->GetCacheHash(), 10); return key; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index d8c4a22ab21f7..6a52c88a99220 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -168,6 +168,16 @@ class ManualDWARFIndex : public DWARFIndex { const lldb::LanguageType cu_language, IndexSet &set); + /// Return true if this manual DWARF index is covering only part of the DWARF. + /// + /// An instance of this class will be used to index all of the DWARF, but also + /// when we have .debug_names we will use one to index any compile or type + /// units that are not covered by the .debug_names table. + /// + /// \return + /// True if this index is a partial index, false otherwise. + bool IsPartial() const; + /// The DWARF file which we are indexing. SymbolFileDWARF *m_dwarf; /// Which dwarf units should we skip while building the index. diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp new file mode 100644 index 0000000000000..3e97c3fb1ebc2 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp @@ -0,0 +1,62 @@ +// REQUIRES: lld + +// Test if we build a mixed binary where one .o file has a .debug_names and +// another doesn't have one, that we save a full or partial index cache. +// Previous versions of LLDB would have ManualDWARFIndex.cpp that would save out +// an index cache to the same file regardless of wether the index cache was a +// full DWARF manual index, or just the CUs and TUs that were missing from any +// .debug_names tables. If the user had a .debug_names table and debugged once +// with index caching enabled, then debugged again but set the setting to ignore +// .debug_names ('settings set plugin.symbol-file.dwarf.ignore-file-indexes 1') +// this could cause LLDB to load the index cache from the previous run which +// was incomplete and it only contained the manually indexed DWARF from the run +// where we used .debug_names, but it would now load it as if it were the +// complete DWARF index. + +// Test that if we don't have .debug_names, that we save a full DWARF index. +// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o +// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o +// RUN: ld.lld %t.main.o %t.foo.o -o %t.nonames +// RUN: llvm-dwp %t.main.dwo %t.foo.dwo -o %t.nonames.dwp +// RUN: rm %t.main.dwo %t.foo.dwo +// Run one time with the index cache enabled to populate the index cache. When +// we populate the index cache we have to parse all of the DWARF debug info +// and it is always available. +// RUN: rm -rf %t.lldb-index-cache +// RUN: %lldb \ +// RUN: -O 'settings set symbols.enable-lldb-index-cache true' \ +// RUN: -O 'settings set symbols.lldb-index-cache-path %t.lldb-index-cache' \ +// RUN: -O 'settings set target.preload-symbols true' \ +// RUN: %t.nonames -b + +// Make sure there is a file with "dwarf-index-full" in its filename +// RUN: ls %t.lldb-index-cache | FileCheck %s -check-prefix=FULL +// FULL: {{dwp-index-cache.cpp.tmp.nonames.*-dwarf-index-full-}} + +// Test that if we have one .o file with .debug_names and one without, that we +// save a partial DWARF index. +// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -gpubnames +// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o +// RUN: ld.lld %t.main.o %t.foo.o -o %t.somenames +// RUN: llvm-dwp %t.main.dwo %t.foo.dwo -o %t.somenames.dwp +// RUN: rm %t.main.dwo %t.foo.dwo +// Run one time with the index cache enabled to populate the index cache. When +// we populate the index cache we have to parse all of the DWARF debug info +// and it is always available. +// RUN: rm -rf %t.lldb-index-cache +// RUN: %lldb \ +// RUN: -O 'settings set symbols.enable-lldb-index-cache true' \ +// RUN: -O 'settings set symbols.lldb-index-cache-path %t.lldb-index-cache' \ +// RUN: -O 'settings set target.preload-symbols true' \ +// RUN: %t.somenames -b + +// Make sure there is a file with "dwarf-index-full" in its filename +// RUN: ls %t.lldb-index-cache | FileCheck %s -check-prefix=PARTIAL +// PARTIAL: {{dwp-index-cache.cpp.tmp.somenames.*-dwarf-index-partial-}} + +#if MAIN +extern int foo(); +int main() { return foo(); } +#else +int foo() { return 0; } +#endif From e8b9e1354accf33ced45321abdd8c8bc65d025cc Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 3 Dec 2024 13:16:41 -0500 Subject: [PATCH 102/191] [bazel] update config.h.cmake for 4dafb091a0336a6669e6369cb69ef1ea1b39578d reverts 1a1de2465d8399b178149e7e23c7450fdc5dde9f --- utils/bazel/llvm_configs/abi-breaking.h.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/bazel/llvm_configs/abi-breaking.h.cmake b/utils/bazel/llvm_configs/abi-breaking.h.cmake index 81495f0569752..2d27e02b1d545 100644 --- a/utils/bazel/llvm_configs/abi-breaking.h.cmake +++ b/utils/bazel/llvm_configs/abi-breaking.h.cmake @@ -12,8 +12,6 @@ #ifndef LLVM_ABI_BREAKING_CHECKS_H #define LLVM_ABI_BREAKING_CHECKS_H -#include "llvm/Support/Compiler.h" - /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -45,12 +43,12 @@ #endif namespace llvm { #if LLVM_ENABLE_ABI_BREAKING_CHECKS -LLVM_ABI extern int EnableABIBreakingChecks; +extern int EnableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyEnableABIBreakingChecks = &EnableABIBreakingChecks; #else -LLVM_ABI extern int DisableABIBreakingChecks; +extern int DisableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyDisableABIBreakingChecks = &DisableABIBreakingChecks; From 2a1a02461a8d4ae9f560a4215fe85a1f085b4d82 Mon Sep 17 00:00:00 2001 From: Luke Riddle <35970909+lukejriddle@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:29:12 -0500 Subject: [PATCH 103/191] Make SBMemoryRegionInfoList iterable with Python SWIG (#117358) This PR fixes a simple SWIG issue with SBMemoryRegionInfoList not being iterable out-of-the-box. This is mostly because of limitations to the `lldb_iter` function, which doesn't allow for specifying arguments to the size / iter functions passed. Before: ``` (lldb) script Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. >>> for region in lldb.process.GetMemoryRegions(): ... print(region) ... Traceback (most recent call last): File "", line 1, in File "/opt/llvm/stable/Toolchains/llvm-sand.xctoolchain/usr/lib/python3.10/site-packages/lldb/__init__.py", line 114, in lldb_iter yield elem(i) TypeError: SBMemoryRegionInfoList.GetMemoryRegionAtIndex() missing 1 required positional argument: 'region_info' ``` After: ``` (lldb) script Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. >>> for region in lldb.process.GetMemoryRegions(): ... print(region) ... [0x0000000000200000-0x00000000002cf000 R--] [0x00000000002cf000-0x0000000000597000 R-X] [0x0000000000597000-0x00000000005ad000 R--] [0x00000000005ad000-0x00000000005b1000 RW-] [0x00000000005b1000-0x0000000000b68000 RW-] [0x000000007fff7000-0x000000008fff7000 RW-] [0x000002008fff7000-0x000010007fff8000 RW-] [0x0000503000000000-0x0000503000010000 RW-] [0x0000503e00000000-0x0000503e00010000 RW-] [0x0000504000000000-0x0000504000010000 RW-] [0x0000504e00000000-0x0000504e00010000 RW-] [0x000050d000000000-0x000050d000010000 RW-] [0x000050de00000000-0x000050de00010000 RW-] [0x000050e000000000-0x000050e000010000 RW-] [0x000050ee00000000-0x000050ee00010000 RW-] [0x0000511000000000-0x0000511000010000 RW-] [0x0000511e00000000-0x0000511e00010000 RW-] [0x0000513000000000-0x0000513000010000 RW-] ... ``` --- .../interface/SBMemoryRegionInfoListExtensions.i | 7 ++++++- .../python_api/find_in_memory/TestFindInMemory.py | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lldb/bindings/interface/SBMemoryRegionInfoListExtensions.i b/lldb/bindings/interface/SBMemoryRegionInfoListExtensions.i index 49d49110de7ff..29c0179c0ffe3 100644 --- a/lldb/bindings/interface/SBMemoryRegionInfoListExtensions.i +++ b/lldb/bindings/interface/SBMemoryRegionInfoListExtensions.i @@ -7,7 +7,12 @@ def __iter__(self): '''Iterate over all the memory regions in a lldb.SBMemoryRegionInfoList object.''' - return lldb_iter(self, 'GetSize', 'GetMemoryRegionAtIndex') + import lldb + size = self.GetSize() + region = lldb.SBMemoryRegionInfo() + for i in range(size): + self.GetMemoryRegionAtIndex(i, region) + yield region %} #endif } diff --git a/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py b/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py index 04e807c5c6201..1ef37d2ec9898 100644 --- a/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py +++ b/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py @@ -152,3 +152,16 @@ def test_find_in_memory_unaligned(self): ) self.assertSuccess(error) self.assertEqual(addr, lldb.LLDB_INVALID_ADDRESS) + + def test_memory_info_list_iterable(self): + """Make sure the SBMemoryRegionInfoList is iterable""" + self.assertTrue(self.process, PROCESS_IS_VALID) + self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + info_list = self.process.GetMemoryRegions() + self.assertTrue(info_list.GetSize() > 0) + try: + for info in info_list: + pass + except Exception: + self.fail("SBMemoryRegionInfoList is not iterable") From 935bbbbde4b2661ed7f8f2975a39bda360572572 Mon Sep 17 00:00:00 2001 From: Zaara Syeda Date: Tue, 3 Dec 2024 13:31:26 -0500 Subject: [PATCH 104/191] [PPC] Remove missed cases of ppc-merge-string-pool (#117626) PPCMergeStringPool was replaced with GlobalMerge with commit aaa37d6. Some cases of option ppc-merge-string-pool were missed being removed. --- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 5 ----- llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll | 6 +++--- llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll | 8 ++++---- .../CodeGen/PowerPC/aix-xcoff-used-with-stringpool.ll | 2 +- .../CodeGen/PowerPC/mergeable-string-pool-exceptions.ll | 2 +- llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll | 4 ++-- 6 files changed, 11 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 2b64ab9aa6973..11d7ea68312fb 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -99,11 +99,6 @@ static cl::opt cl::desc("Expand eligible cr-logical binary ops to branches"), cl::init(true), cl::Hidden); -static cl::opt MergeStringPool( - "ppc-merge-string-pool", - cl::desc("Merge all of the strings in a module into one pool"), - cl::init(true), cl::Hidden); - static cl::opt EnablePPCGenScalarMASSEntries( "enable-ppc-gen-scalar-mass", cl::init(false), cl::desc("Enable lowering math functions to their corresponding MASS " diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll index 5462240846994..edc74e393108f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll @@ -4,14 +4,14 @@ ; tests for XCOFF object files. ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -xcoff-traceback-table=false \ -; RUN: -mtriple powerpc-ibm-aix-xcoff -data-sections=false -ppc-merge-string-pool=false \ +; RUN: -mtriple powerpc-ibm-aix-xcoff -data-sections=false \ ; RUN: -global-merge-all-const=false < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -xcoff-traceback-table=false \ -; RUN: -mtriple powerpc64-ibm-aix-xcoff -data-sections=false -ppc-merge-string-pool=false \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -data-sections=false \ ; RUN: -global-merge-all-const=false < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ -; RUN: -xcoff-traceback-table=false -data-sections=false -ppc-merge-string-pool=false \ +; RUN: -xcoff-traceback-table=false -data-sections=false \ ; RUN: -global-merge-all-const=false -filetype=obj -o %t.o < %s ; RUN: llvm-objdump -D %t.o | FileCheck --check-prefix=CHECKOBJ %s diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll index 0fa47373964a0..9b0a3fe0a716f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll @@ -1,16 +1,16 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff -data-sections=false -ppc-merge-string-pool=false < %s | \ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff -data-sections=false < %s | \ ; RUN: FileCheck --check-prefixes=CHECK,CHECK32 %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff -data-sections=false -ppc-merge-string-pool=false < %s | \ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff -data-sections=false < %s | \ ; RUN: FileCheck --check-prefixes=CHECK,CHECK64 %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff -data-sections=false -ppc-merge-string-pool=false \ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff -data-sections=false \ ; RUN: -filetype=obj -o %t.o < %s ; RUN: llvm-readobj --section-headers --file-header %t.o | \ ; RUN: FileCheck --check-prefixes=OBJ,OBJ32 %s ; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefixes=SYMS,SYMS32 %s ; RUN: llvm-objdump -D %t.o | FileCheck --check-prefix=DIS %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff -data-sections=false -ppc-merge-string-pool=false \ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff -data-sections=false \ ; RUN: -filetype=obj -o %t64.o < %s ; RUN: llvm-readobj --section-headers --file-header %t64.o | \ ; RUN: FileCheck --check-prefixes=OBJ,OBJ64 %s diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-used-with-stringpool.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-used-with-stringpool.ll index bbcba59e2e33a..5e4784ac2904c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-used-with-stringpool.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-used-with-stringpool.ll @@ -1,4 +1,4 @@ -;; Test that the string pooling pass does not pool globals that are +;; Test that the global merge pass does not pool globals that are ;; in llvm.used or in llvm.compiler.used. ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc-ibm-aix-xcoff \ diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-exceptions.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-exceptions.ll index 03a830e087d26..10179bba136f9 100644 --- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-exceptions.ll +++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-exceptions.ll @@ -4,7 +4,7 @@ @id = private unnamed_addr constant [4 x i8] c"@id\00", align 1 @id2 = private unnamed_addr constant [5 x i8] c"@id2\00", align 1 -; Higher-aligned dummy to make sure it is first in the string pool. +; Higher-aligned dummy to make sure it is first in the global merge pool. @dummy = private unnamed_addr constant [1 x i32] [i32 42], align 4 define ptr @test1() personality ptr @__gnu_objc_personality_v0 { diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll index a726e7741cf0f..aa0b441646fd3 100644 --- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll +++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll @@ -258,8 +258,8 @@ entry: ret void } -; Check the contents of the TLS data and the __ModuleStringPool structure to -; check that TLS data has been skipped during string pool merging. +; Check the contents of the TLS data and the _MergedGlobals structure to +; check that TLS data has been skipped during global merge. ; CHECK64: .csect a[TL],2 ; CHECK64-NEXT: .lglobl a[TL] From 0ca8a593e5da5d00100db8f735b4c4babeb36eb8 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Tue, 3 Dec 2024 10:31:38 -0800 Subject: [PATCH 105/191] [lldb][NFC] Remove unused field Platform::m_remote_url (#118411) Related: https://discourse.llvm.org/t/rfc-lldb-a-proposal-to-refactor-platform/82697 --- lldb/include/lldb/Target/Platform.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 920f80bc73317..f8a2cbf0d5d04 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -473,8 +473,6 @@ class Platform : public PluginInterface { LLVM_PRETTY_FUNCTION, GetName())); } - const std::string &GetRemoteURL() const { return m_remote_url; } - bool IsHost() const { return m_is_host; // Is this the default host platform? } @@ -977,7 +975,6 @@ class Platform : public PluginInterface { std::string m_sdk_build; FileSpec m_working_dir; // The working directory which is used when installing // modules that have no install path set - std::string m_remote_url; std::string m_hostname; llvm::VersionTuple m_os_version; ArchSpec From 0c6457b781ae8365ef2169376ae78675b5b4896b Mon Sep 17 00:00:00 2001 From: Mason Remy Date: Tue, 3 Dec 2024 10:33:15 -0800 Subject: [PATCH 106/191] [LLVM][TableGen] Refine overloaded intrinsic suffix check (#117957) Previously the check comments indicated that [pi][0-9]+ would match as a type suffix, however the check itself was looking for [pi][0-9]* and hence an 'i' suffix in isolation was being considered as a type suffix despite it not having a bitwidth. This change makes the check consistent with the comment and looks for [pi][0-9]+ --- llvm/test/TableGen/intrinsic-overload-conflict.td | 6 +++++- llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/test/TableGen/intrinsic-overload-conflict.td b/llvm/test/TableGen/intrinsic-overload-conflict.td index 84333119d41f5..13431c3bc49e0 100644 --- a/llvm/test/TableGen/intrinsic-overload-conflict.td +++ b/llvm/test/TableGen/intrinsic-overload-conflict.td @@ -6,13 +6,17 @@ include "llvm/IR/Intrinsics.td" // CHECK: foo = 1, def int_foo : Intrinsic<[llvm_any_ty]>; -// No conflicts, since .bar is not a vaid mangled type. +// No conflicts, since .bar is not a valid mangled type. // CHECK: foo_bar, def int_foo_bar : Intrinsic<[llvm_i32_ty]>; // CHECK: foo_bar_f32, def int_foo_bar_f32 : Intrinsic<[llvm_i32_ty]>; +// No conflicts, since i is not a valid mangled type without a bitwidth. +// CHECK: foo_i +def int_foo_i : Intrinsic<[llvm_i32_ty]>; + #ifdef CONFLICT // CHECK-CONFLICT: error: intrinsic `llvm.foo.a3` cannot share prefix `llvm.foo.a3` with another overloaded intrinsic `llvm.foo` // CHECK-CONFLICT: error: intrinsic `llvm.foo.bf16` cannot share prefix `llvm.foo.bf16` with another overloaded intrinsic `llvm.foo` diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp index 18e0b8fd135bb..0846f66ea6452 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp @@ -157,7 +157,8 @@ static bool doesSuffixLookLikeMangledType(StringRef Suffix) { return false; // [pi][0-9]+ - if (is_contained("pi", Suffix[0]) && all_of(Suffix.drop_front(), isDigit)) + if (Suffix.size() > 1 && is_contained("pi", Suffix[0]) && + all_of(Suffix.drop_front(), isDigit)) return true; // Match one of the named types. From deed1b0664e85cd3bb33451a81db52459d9f7663 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 3 Dec 2024 19:32:15 +0100 Subject: [PATCH 107/191] [SystemZ] Fix address parsing in HLASM mode When parsing an address that contains only a single register for an instruction that actually has both a base and an index register, the parsed register is treated as base by AsmParser. This is correct when emulating the GNU assembler, but not when emulating HLASM, as the latter treat the register as index in this case. --- llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index e4aefc42d860f..599afed2199fb 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -1148,9 +1148,10 @@ ParseStatus SystemZAsmParser::parseAddress(OperandVector &Operands, if (HaveReg1) { if (parseAddressRegister(Reg1)) return ParseStatus::Failure; - // If the are two registers, the first one is the index and the - // second is the base. - if (HaveReg2) + // If there are two registers, the first one is the index and the + // second is the base. If there is only a single register, it is + // used as base with GAS and as index with HLASM. + if (HaveReg2 || isParsingHLASM()) Index = Reg1.Num == 0 ? 0 : Regs[Reg1.Num]; else Base = Reg1.Num == 0 ? 0 : Regs[Reg1.Num]; From 9791f258079a4334c61c64cb62d9746a3db2c25c Mon Sep 17 00:00:00 2001 From: Aidan Goldfarb <47676355+AidanGoldfarb@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:39:17 -0500 Subject: [PATCH 108/191] [clang][sema] Add support and documentation for `__has_extension(c_fixed_enum)` (#117507) This PR addresses #116880 Updated [LanguageExtensions.rst](https://github.com/llvm/llvm-project/blob/main/clang/docs/LanguageExtensions.rst) to include support for C++11 enumerations with a fixed underlying type in C. Included a note that this is only a language extension prior to C23. Updated [Features.def](https://github.com/llvm-mirror/clang/blob/master/include/clang/Basic/Features.def) to support for `__has_extension(c_fixed_enum)` by added it as a feature (for C23) and an extension (for `_. For example, one can write an enumeration type as: .. code-block:: c++ @@ -2001,6 +2001,14 @@ value, is ``unsigned char``. Use ``__has_feature(objc_fixed_enum)`` to determine whether support for fixed underlying types is available in Objective-C. +Use ``__has_extension(c_fixed_enum)`` to determine whether support for fixed +underlying types is available in C prior to C23. This will also report ``true`` in C23 +and later modes as the functionality is available even if it's not an extension in +those modes. + +Use ``__has_feature(c_fixed_enum)`` to determine whether support for fixed +underlying types is available in C23 and later. + Interoperability with C++11 lambdas ----------------------------------- diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index 9088c867d53ce..15c59c6bcdf29 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -163,6 +163,8 @@ FEATURE(c_atomic, LangOpts.C11) FEATURE(c_generic_selections, LangOpts.C11) FEATURE(c_static_assert, LangOpts.C11) FEATURE(c_thread_local, LangOpts.C11 &&PP.getTargetInfo().isTLSSupported()) +// C23 features +FEATURE(c_fixed_enum, LangOpts.C23) // C++11 features FEATURE(cxx_access_control_sfinae, LangOpts.CPlusPlus11) FEATURE(cxx_alias_templates, LangOpts.CPlusPlus11) @@ -269,6 +271,7 @@ EXTENSION(c_static_assert, true) EXTENSION(c_thread_local, PP.getTargetInfo().isTLSSupported()) // C23 features supported by other languages as extensions EXTENSION(c_attributes, true) +EXTENSION(c_fixed_enum, true) // C++11 features supported by other languages as extensions. EXTENSION(cxx_atomic, LangOpts.CPlusPlus) EXTENSION(cxx_default_function_template_args, LangOpts.CPlusPlus) diff --git a/clang/test/Sema/enum.c b/clang/test/Sema/enum.c index 4f6d04ba7f918..f12ce61ac13a6 100644 --- a/clang/test/Sema/enum.c +++ b/clang/test/Sema/enum.c @@ -121,6 +121,17 @@ int NegativeShortTest[NegativeShort == -1 ? 1 : -1]; enum Color { Red, Green, Blue }; // expected-note{{previous use is here}} typedef struct Color NewColor; // expected-error {{use of 'Color' with tag type that does not match previous declaration}} +// Enumerations with a fixed underlying type. +// https://github.com/llvm/llvm-project/issues/116880 +#if __STDC_VERSION__ >= 202311L + static_assert(__has_feature(c_fixed_enum)); + static_assert(__has_extension(c_fixed_enum)); // Matches behavior for c_alignas, etc +#else + _Static_assert(__has_extension(c_fixed_enum), ""); + _Static_assert(!__has_feature(c_fixed_enum), ""); +#endif +typedef enum : unsigned char { Pink, Black, Cyan } Color; // pre-c23-warning {{enumeration types with a fixed underlying type are a C23 extension}} + // PR28903 // In C it is valid to define tags inside enums. struct PR28903 { From acf3b1aa932b2237c181686e52bc61584a80a3ff Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 3 Dec 2024 10:39:40 -0800 Subject: [PATCH 109/191] Reland [NFC] Move DroppedVariableStats to its own file and redesign it to be extensible. (#117042) Moved the IR unit test to the CodeGen folder to resolve linker errors: `error: undefined reference to 'vtable for llvm::DroppedVariableStatsIR'` This patch is trying to reland https://github.com/llvm/llvm-project/pull/115563 --- .../llvm/CodeGen/DroppedVariableStats.h | 226 ++++++++++++++++++ .../llvm/Passes/StandardInstrumentations.h | 80 +------ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/DroppedVariableStats.cpp | 194 +++++++++++++++ llvm/lib/Passes/StandardInstrumentations.cpp | 178 +------------- llvm/unittests/CodeGen/CMakeLists.txt | 1 + .../DroppedVariableStatsIRTest.cpp} | 74 +++--- llvm/unittests/IR/CMakeLists.txt | 1 - 8 files changed, 456 insertions(+), 299 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/DroppedVariableStats.h create mode 100644 llvm/lib/CodeGen/DroppedVariableStats.cpp rename llvm/unittests/{IR/DroppedVariableStatsTest.cpp => CodeGen/DroppedVariableStatsIRTest.cpp} (91%) diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h new file mode 100644 index 0000000000000..371d775b02e87 --- /dev/null +++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h @@ -0,0 +1,226 @@ +///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// +/// +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM +/// Exceptions. See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/// +///===---------------------------------------------------------------------===// +/// \file +/// Dropped Variable Statistics for Debug Information. Reports any number +/// of #dbg_value that get dropped due to an optimization pass. +/// +///===---------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H +#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H + +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassInstrumentation.h" + +namespace llvm { + +/// A unique key that represents a debug variable. +/// First const DIScope *: Represents the scope of the debug variable. +/// Second const DIScope *: Represents the InlinedAt scope of the debug +/// variable. const DILocalVariable *: It is a pointer to the debug variable +/// itself. +using VarID = + std::tuple; + +/// A base class to collect and print dropped debug information variable +/// statistics. +class DroppedVariableStats { +public: + DroppedVariableStats(bool DroppedVarStatsEnabled) + : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { + if (DroppedVarStatsEnabled) + llvm::outs() + << "Pass Level, Pass Name, Num of Dropped Variables, Func or " + "Module Name\n"; + }; + + virtual ~DroppedVariableStats() = default; + + // We intend this to be unique per-compilation, thus no copies. + DroppedVariableStats(const DroppedVariableStats &) = delete; + void operator=(const DroppedVariableStats &) = delete; + + bool getPassDroppedVariables() { return PassDroppedVariables; } + +protected: + void setup() { + DebugVariablesStack.push_back( + {DenseMap()}); + InlinedAts.push_back( + {DenseMap>()}); + } + + void cleanup() { + assert(!DebugVariablesStack.empty() && + "DebugVariablesStack shouldn't be empty!"); + assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); + } + + bool DroppedVariableStatsEnabled = false; + struct DebugVariables { + /// DenseSet of VarIDs before an optimization pass has run. + DenseSet DebugVariablesBefore; + /// DenseSet of VarIDs after an optimization pass has run. + DenseSet DebugVariablesAfter; + }; + +protected: + /// A stack of a DenseMap, that maps DebugVariables for every pass to an + /// llvm::Function. A stack is used because an optimization pass can call + /// other passes. + SmallVector> DebugVariablesStack; + + /// A DenseSet tracking whether a scope was visited before. + DenseSet VisitedScope; + /// A stack of DenseMaps, which map the name of an llvm::Function to a + /// DenseMap of VarIDs and their inlinedAt locations before an optimization + /// pass has run. + SmallVector>> InlinedAts; + /// Calculate the number of dropped variables in an llvm::Function or + /// llvm::MachineFunction and print the relevant information to stdout. + void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, + StringRef FuncName, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel, const Function *Func); + + /// Check if a \p Var has been dropped or is a false positive. Also update the + /// \p DroppedCount if a debug variable is dropped. + bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, + const DIScope *DbgValScope, + DenseMap &InlinedAtsMap, + VarID Var, unsigned &DroppedCount); + /// Run code to populate relevant data structures over an llvm::Function or + /// llvm::MachineFunction. + void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before); + /// Populate the VarIDSet and InlinedAtMap with the relevant information + /// needed for before and after pass analysis to determine dropped variable + /// status. + void populateVarIDSetAndInlinedMap( + const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before); + /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the + /// debug variable denoted by its ID \p Var may have been dropped by an + /// optimization pass. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap &InlinedAtsMap, + VarID Var) = 0; + /// Visit every debug record in an llvm::Function or llvm::MachineFunction + /// and call populateVarIDSetAndInlinedMap on it. + virtual void visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) = 0; + +private: + /// Remove a dropped debug variable's VarID from all Sets in the + /// DroppedVariablesBefore stack. + void removeVarFromAllSets(VarID Var, const Function *F) { + // Do not remove Var from the last element, it will be popped from the + // stack. + for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) + DebugVariablesMap[F].DebugVariablesBefore.erase(Var); + } + /// Return true if \p Scope is the same as \p DbgValScope or a child scope of + /// \p DbgValScope, return false otherwise. + bool isScopeChildOfOrEqualTo(const DIScope *Scope, + const DIScope *DbgValScope); + /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of + /// the InlinedAt chain, return false otherwise. + bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, + const DILocation *DbgValInlinedAt); + bool PassDroppedVariables = false; +}; + +/// A class to collect and print dropped debug information due to LLVM IR +/// optimization passes. After every LLVM IR pass is run, it will print how many +/// #dbg_values were dropped due to that pass. +class DroppedVariableStatsIR : public DroppedVariableStats { +public: + DroppedVariableStatsIR(bool DroppedVarStatsEnabled) + : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} + + virtual ~DroppedVariableStatsIR() = default; + + void runBeforePass(Any IR) { + setup(); + if (const auto *M = unwrapIR(IR)) + return this->runOnModule(M, true); + if (const auto *F = unwrapIR(IR)) + return this->runOnFunction(F, true); + } + + void runAfterPass(StringRef P, Any IR) { + if (const auto *M = unwrapIR(IR)) + runAfterPassModule(P, M); + else if (const auto *F = unwrapIR(IR)) + runAfterPassFunction(P, F); + cleanup(); + } + + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +private: + const Function *Func; + + void runAfterPassFunction(StringRef PassID, const Function *F) { + runOnFunction(F, false); + calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), + "Function"); + } + + void runAfterPassModule(StringRef PassID, const Module *M) { + runOnModule(M, false); + calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); + } + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Function. + void runOnFunction(const Function *F, bool Before); + /// Iterate over all Instructions in a Function and report any dropped debug + /// information. + void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Module. Calls runOnFunction on every Function in the Module. + void runOnModule(const Module *M, bool Before); + /// Iterate over all Functions in a Module and report any dropped debug + /// information. Will call calculateDroppedVarStatsOnFunction on every + /// Function. + void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel); + /// Override base class method to run on an llvm::Function specifically. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap &InlinedAtsMap, + VarID Var) override; + /// Override base class method to run on #dbg_values specifically. + virtual void visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) override; + + template static const IRUnitT *unwrapIR(Any IR) { + const IRUnitT **IRPtr = llvm::any_cast(&IR); + return IRPtr ? *IRPtr : nullptr; + } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 9301a12c740ee..12a34c099eaff 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -579,83 +580,6 @@ class PrintCrashIRInstrumentation { static void SignalHandler(void *); }; -/// A class to collect and print dropped debug information variable statistics. -/// After every LLVM IR pass is run, it will print how many #dbg_values were -/// dropped due to that pass. -class DroppedVariableStats { -public: - DroppedVariableStats(bool DroppedVarStatsEnabled) { - if (DroppedVarStatsEnabled) - llvm::outs() - << "Pass Level, Pass Name, Num of Dropped Variables, Func or " - "Module Name\n"; - }; - // We intend this to be unique per-compilation, thus no copies. - DroppedVariableStats(const DroppedVariableStats &) = delete; - void operator=(const DroppedVariableStats &) = delete; - - void registerCallbacks(PassInstrumentationCallbacks &PIC); - void runBeforePass(StringRef PassID, Any IR); - void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); - void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); - bool getPassDroppedVariables() { return PassDroppedVariables; } - -private: - bool PassDroppedVariables = false; - /// A unique key that represents a #dbg_value. - using VarID = - std::tuple; - - struct DebugVariables { - /// DenseSet of VarIDs before an optimization pass has run. - DenseSet DebugVariablesBefore; - /// DenseSet of VarIDs after an optimization pass has run. - DenseSet DebugVariablesAfter; - }; - - /// A stack of a DenseMap, that maps DebugVariables for every pass to an - /// llvm::Function. A stack is used because an optimization pass can call - /// other passes. - SmallVector> DebugVariablesStack; - - /// A DenseSet tracking whether a scope was visited before. - DenseSet VisitedScope; - /// A stack of DenseMaps, which map the name of an llvm::Function to a - /// DenseMap of VarIDs and their inlinedAt locations before an optimization - /// pass has run. - SmallVector>> InlinedAts; - - /// Iterate over all Functions in a Module and report any dropped debug - /// information. Will call calculateDroppedVarStatsOnFunction on every - /// Function. - void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, - std::string FuncOrModName, - std::string PassLevel); - /// Iterate over all Instructions in a Function and report any dropped debug - /// information. - void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, - std::string FuncOrModName, - std::string PassLevel); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Function. - void runOnFunction(const Function *F, bool Before); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Module. Calls runOnFunction on every Function in the Module. - void runOnModule(const Module *M, bool Before); - /// Remove a dropped #dbg_value VarID from all Sets in the - /// DroppedVariablesBefore stack. - void removeVarFromAllSets(VarID Var, const Function *F); - /// Return true if \p Scope is the same as \p DbgValScope or a child scope of - /// \p DbgValScope, return false otherwise. - bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); - /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of - /// the InlinedAt chain, return false otherwise. - bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, - const DILocation *DbgValInlinedAt); -}; - /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -673,7 +597,7 @@ class StandardInstrumentations { PrintCrashIRInstrumentation PrintCrashIR; IRChangedTester ChangeTester; VerifyInstrumentation Verify; - DroppedVariableStats DroppedStats; + DroppedVariableStatsIR DroppedStatsIR; bool VerifyEach; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 7b47c0e6f75db..263d4a9ee94d2 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -50,6 +50,7 @@ add_llvm_component_library(LLVMCodeGen DeadMachineInstructionElim.cpp DetectDeadLanes.cpp DFAPacketizer.cpp + DroppedVariableStats.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp new file mode 100644 index 0000000000000..122fcad1293f1 --- /dev/null +++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp @@ -0,0 +1,194 @@ +///===- DroppedVariableStats.cpp ------------------------------------------===// +/// +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM +/// Exceptions. See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/// +///===---------------------------------------------------------------------===// +/// \file +/// Dropped Variable Statistics for Debug Information. Reports any number +/// of #dbg_value that get dropped due to an optimization pass. +/// +///===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/DroppedVariableStats.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +bool DroppedVariableStats::isScopeChildOfOrEqualTo(const DIScope *Scope, + const DIScope *DbgValScope) { + while (Scope != nullptr) { + if (VisitedScope.find(Scope) == VisitedScope.end()) { + VisitedScope.insert(Scope); + if (Scope == DbgValScope) { + VisitedScope.clear(); + return true; + } + Scope = Scope->getScope(); + } else { + VisitedScope.clear(); + return false; + } + } + return false; +} + +bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( + const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { + if (DbgValInlinedAt == InlinedAt) + return true; + if (!DbgValInlinedAt) + return false; + auto *IA = InlinedAt; + while (IA) { + if (IA == DbgValInlinedAt) + return true; + IA = IA->getInlinedAt(); + } + return false; +} + +void DroppedVariableStats::calculateDroppedStatsAndPrint( + DebugVariables &DbgVariables, StringRef FuncName, StringRef PassID, + StringRef FuncOrModName, StringRef PassLevel, const Function *Func) { + unsigned DroppedCount = 0; + DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; + DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; + DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; + // Find an Instruction that shares the same scope as the dropped #dbg_value or + // has a scope that is the child of the scope of the #dbg_value, and has an + // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain + // contains the inlinedAt of the #dbg_value, if such an Instruction is found, + // debug information is dropped. + for (VarID Var : DebugVariablesBeforeSet) { + if (DebugVariablesAfterSet.contains(Var)) + continue; + visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); + removeVarFromAllSets(Var, Func); + } + if (DroppedCount > 0) { + llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " + << FuncOrModName << "\n"; + PassDroppedVariables = true; + } else + PassDroppedVariables = false; +} + +bool DroppedVariableStats::updateDroppedCount( + DILocation *DbgLoc, const DIScope *Scope, const DIScope *DbgValScope, + DenseMap &InlinedAtsMap, VarID Var, + unsigned &DroppedCount) { + + // If the Scope is a child of, or equal to the DbgValScope and is inlined at + // the Var's InlinedAt location, return true to signify that the Var has been + // dropped. + if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) + if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), + InlinedAtsMap[Var])) { + // Found another instruction in the variable's scope, so there exists a + // break point at which the variable could be observed. Count it as + // dropped. + DroppedCount++; + return true; + } + return false; +} + +void DroppedVariableStats::run(DebugVariables &DbgVariables, StringRef FuncName, + bool Before) { + auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore + : DbgVariables.DebugVariablesAfter); + auto &InlinedAtsMap = InlinedAts.back(); + if (Before) + InlinedAtsMap.try_emplace(FuncName, DenseMap()); + VarIDSet = DenseSet(); + visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); +} + +void DroppedVariableStats::populateVarIDSetAndInlinedMap( + const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) { + VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; + VarIDSet.insert(Key); + if (Before) + InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); +} + +void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) { + auto &DebugVariables = DebugVariablesStack.back()[F]; + auto FuncName = F->getName(); + Func = F; + run(DebugVariables, FuncName, Before); +} + +void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction( + const Function *F, StringRef PassID, StringRef FuncOrModName, + StringRef PassLevel) { + Func = F; + StringRef FuncName = F->getName(); + DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; + calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, + PassLevel, Func); +} + +void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) { + for (auto &F : *M) + runOnFunction(&F, Before); +} + +void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule( + const Module *M, StringRef PassID, StringRef FuncOrModName, + StringRef PassLevel) { + for (auto &F : *M) { + calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); + } +} + +void DroppedVariableStatsIR::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!DroppedVariableStatsEnabled) + return; + + PIC.registerBeforeNonSkippedPassCallback( + [this](StringRef P, Any IR) { return runBeforePass(IR); }); + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &PA) { + return runAfterPass(P, IR); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &PA) { return cleanup(); }); +} + +void DroppedVariableStatsIR::visitEveryInstruction( + unsigned &DroppedCount, DenseMap &InlinedAtsMap, + VarID Var) { + const DIScope *DbgValScope = std::get<0>(Var); + for (const auto &I : instructions(Func)) { + auto *DbgLoc = I.getDebugLoc().get(); + if (!DbgLoc) + continue; + if (updateDroppedCount(DbgLoc, DbgLoc->getScope(), DbgValScope, + InlinedAtsMap, Var, DroppedCount)) + break; + } +} + +void DroppedVariableStatsIR::visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) { + for (const auto &I : instructions(Func)) { + for (DbgRecord &DR : I.getDbgRecordRange()) { + if (auto *Dbg = dyn_cast(&DR)) { + auto *DbgVar = Dbg->getVariable(); + auto DbgLoc = DR.getDebugLoc(); + populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, + FuncName, Before); + } + } + } +} diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 6259f8f736c80..b766517e68eba 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -2462,7 +2462,7 @@ StandardInstrumentations::StandardInstrumentations( PrintChanged == ChangePrinter::ColourDiffVerbose || PrintChanged == ChangePrinter::ColourDiffQuiet), WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), - Verify(DebugLogging), DroppedStats(DroppedVarStats), + Verify(DebugLogging), DroppedStatsIR(DroppedVarStats), VerifyEach(VerifyEach) {} PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = @@ -2523,180 +2523,6 @@ void PrintCrashIRInstrumentation::registerCallbacks( }); } -void DroppedVariableStats::registerCallbacks( - PassInstrumentationCallbacks &PIC) { - if (!DroppedVarStats) - return; - - PIC.registerBeforeNonSkippedPassCallback( - [this](StringRef P, Any IR) { return this->runBeforePass(P, IR); }); - PIC.registerAfterPassCallback( - [this](StringRef P, Any IR, const PreservedAnalyses &PA) { - return this->runAfterPass(P, IR, PA); - }); - PIC.registerAfterPassInvalidatedCallback( - [this](StringRef P, const PreservedAnalyses &PA) { - return this->runAfterPassInvalidated(P, PA); - }); -} - -void DroppedVariableStats::runBeforePass(StringRef PassID, Any IR) { - DebugVariablesStack.push_back({DenseMap()}); - InlinedAts.push_back({DenseMap>()}); - if (auto *M = unwrapIR(IR)) - return this->runOnModule(M, true); - if (auto *F = unwrapIR(IR)) - return this->runOnFunction(F, true); -} - -void DroppedVariableStats::runOnFunction(const Function *F, bool Before) { - auto &DebugVariables = DebugVariablesStack.back()[F]; - auto &VarIDSet = (Before ? DebugVariables.DebugVariablesBefore - : DebugVariables.DebugVariablesAfter); - auto &InlinedAtsMap = InlinedAts.back(); - auto FuncName = F->getName(); - if (Before) - InlinedAtsMap.try_emplace(FuncName, DenseMap()); - VarIDSet = DenseSet(); - for (const auto &I : instructions(F)) { - for (DbgRecord &DR : I.getDbgRecordRange()) { - if (auto *Dbg = dyn_cast(&DR)) { - auto *DbgVar = Dbg->getVariable(); - auto DbgLoc = DR.getDebugLoc(); - VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; - VarIDSet.insert(Key); - if (Before) - InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); - } - } - } -} - -void DroppedVariableStats::runOnModule(const Module *M, bool Before) { - for (auto &F : *M) - runOnFunction(&F, Before); -} - -void DroppedVariableStats::removeVarFromAllSets(VarID Var, const Function *F) { - // Do not remove Var from the last element, it will be popped from the stack. - for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) - DebugVariablesMap[F].DebugVariablesBefore.erase(Var); -} - -void DroppedVariableStats::calculateDroppedVarStatsOnModule( - const Module *M, StringRef PassID, std::string FuncOrModName, - std::string PassLevel) { - for (auto &F : *M) { - calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); - } -} - -void DroppedVariableStats::calculateDroppedVarStatsOnFunction( - const Function *F, StringRef PassID, std::string FuncOrModName, - std::string PassLevel) { - unsigned DroppedCount = 0; - StringRef FuncName = F->getName(); - DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; - DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; - DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; - DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; - // Find an Instruction that shares the same scope as the dropped #dbg_value or - // has a scope that is the child of the scope of the #dbg_value, and has an - // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain - // contains the inlinedAt of the #dbg_value, if such an Instruction is found, - // debug information is dropped. - for (VarID Var : DebugVariablesBeforeSet) { - if (DebugVariablesAfterSet.contains(Var)) - continue; - const DIScope *DbgValScope = std::get<0>(Var); - for (const auto &I : instructions(F)) { - auto *DbgLoc = I.getDebugLoc().get(); - if (!DbgLoc) - continue; - - auto *Scope = DbgLoc->getScope(); - if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) { - if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), - InlinedAtsMap[Var])) { - // Found another instruction in the variable's scope, so there exists - // a break point at which the variable could be observed. Count it as - // dropped. - DroppedCount++; - break; - } - } - } - removeVarFromAllSets(Var, F); - } - if (DroppedCount > 0) { - llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " - << FuncOrModName << "\n"; - PassDroppedVariables = true; - } else - PassDroppedVariables = false; -} - -void DroppedVariableStats::runAfterPassInvalidated( - StringRef PassID, const PreservedAnalyses &PA) { - DebugVariablesStack.pop_back(); - InlinedAts.pop_back(); -} - -void DroppedVariableStats::runAfterPass(StringRef PassID, Any IR, - const PreservedAnalyses &PA) { - std::string PassLevel; - std::string FuncOrModName; - if (auto *M = unwrapIR(IR)) { - this->runOnModule(M, false); - PassLevel = "Module"; - FuncOrModName = M->getName(); - calculateDroppedVarStatsOnModule(M, PassID, FuncOrModName, PassLevel); - } else if (auto *F = unwrapIR(IR)) { - this->runOnFunction(F, false); - PassLevel = "Function"; - FuncOrModName = F->getName(); - calculateDroppedVarStatsOnFunction(F, PassID, FuncOrModName, PassLevel); - } - - DebugVariablesStack.pop_back(); - InlinedAts.pop_back(); -} - -bool DroppedVariableStats::isScopeChildOfOrEqualTo(DIScope *Scope, - const DIScope *DbgValScope) { - while (Scope != nullptr) { - if (VisitedScope.find(Scope) == VisitedScope.end()) { - VisitedScope.insert(Scope); - if (Scope == DbgValScope) { - VisitedScope.clear(); - return true; - } - Scope = Scope->getScope(); - } else { - VisitedScope.clear(); - return false; - } - } - return false; -} - -bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( - const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { - if (DbgValInlinedAt == InlinedAt) - return true; - if (!DbgValInlinedAt) - return false; - if (!InlinedAt) - return false; - auto *IA = InlinedAt; - while (IA) { - if (IA == DbgValInlinedAt) - return true; - IA = IA->getInlinedAt(); - } - return false; -} - void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) { PrintIR.registerCallbacks(PIC); @@ -2712,7 +2538,7 @@ void StandardInstrumentations::registerCallbacks( WebsiteChangeReporter.registerCallbacks(PIC); ChangeTester.registerCallbacks(PIC); PrintCrashIR.registerCallbacks(PIC); - DroppedStats.registerCallbacks(PIC); + DroppedStatsIR.registerCallbacks(PIC); if (MAM) PreservedCFGChecker.registerCallbacks(PIC, *MAM); diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 963cdcc0275e1..807fd1a9b7b56 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_unittest(CodeGenTests CCStateTest.cpp DIEHashTest.cpp DIETest.cpp + DroppedVariableStatsIRTest.cpp DwarfStringPoolEntryRefTest.cpp InstrRefLDVTest.cpp LowLevelTypeTest.cpp diff --git a/llvm/unittests/IR/DroppedVariableStatsTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp similarity index 91% rename from llvm/unittests/IR/DroppedVariableStatsTest.cpp rename to llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp index 61f3a87bb355e..094ec7b657634 100644 --- a/llvm/unittests/IR/DroppedVariableStatsTest.cpp +++ b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp @@ -1,5 +1,4 @@ -//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests -//----------===// +//===- unittests/IR/DroppedVariableStatsIRTest.cpp ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,7 +44,7 @@ namespace { // This test ensures that if a #dbg_value and an instruction that exists in the // same scope as that #dbg_value are both deleted as a result of an optimization // pass, debug information is considered not dropped. -TEST(DroppedVariableStats, BothDeleted) { +TEST(DroppedVariableStatsIR, BothDeleted) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -79,9 +79,8 @@ TEST(DroppedVariableStats, BothDeleted) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -92,16 +91,15 @@ TEST(DroppedVariableStats, BothDeleted) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that shares the same scope as the #dbg_value still exists, // debug information is conisdered dropped. -TEST(DroppedVariableStats, DbgValLost) { +TEST(DroppedVariableStatsIR, DbgValLost) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -136,9 +134,8 @@ TEST(DroppedVariableStats, DbgValLost) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -148,16 +145,15 @@ TEST(DroppedVariableStats, DbgValLost) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has an unrelated scope as the #dbg_value still // exists, debug information is conisdered not dropped. -TEST(DroppedVariableStats, UnrelatedScopes) { +TEST(DroppedVariableStatsIR, UnrelatedScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -193,9 +189,8 @@ TEST(DroppedVariableStats, UnrelatedScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -205,16 +200,15 @@ TEST(DroppedVariableStats, UnrelatedScopes) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, debug information is conisdered dropped. -TEST(DroppedVariableStats, ChildScopes) { +TEST(DroppedVariableStatsIR, ChildScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -250,9 +244,8 @@ TEST(DroppedVariableStats, ChildScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -262,9 +255,8 @@ TEST(DroppedVariableStats, ChildScopes) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -272,7 +264,7 @@ TEST(DroppedVariableStats, ChildScopes) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value is inlined at another location, debug // information is conisdered not dropped. -TEST(DroppedVariableStats, InlinedAt) { +TEST(DroppedVariableStatsIR, InlinedAt) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -308,9 +300,8 @@ TEST(DroppedVariableStats, InlinedAt) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -320,9 +311,8 @@ TEST(DroppedVariableStats, InlinedAt) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } @@ -330,7 +320,7 @@ TEST(DroppedVariableStats, InlinedAt) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value and the instruction are inlined at another // location, debug information is conisdered dropped. -TEST(DroppedVariableStats, InlinedAtShared) { +TEST(DroppedVariableStatsIR, InlinedAtShared) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -366,9 +356,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -378,9 +367,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -388,7 +376,7 @@ TEST(DroppedVariableStats, InlinedAtShared) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the instruction is inlined at a location that is the // #dbg_value's inlined at location, debug information is conisdered dropped. -TEST(DroppedVariableStats, InlinedAtChild) { +TEST(DroppedVariableStatsIR, InlinedAtChild) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -425,9 +413,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -437,9 +424,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index ed93ee547d223..e5c8630f3eed7 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,7 +43,6 @@ add_llvm_unittest(IRTests ShuffleVectorInstTest.cpp StructuralHashTest.cpp TimePassesTest.cpp - DroppedVariableStatsTest.cpp TypesTest.cpp UseTest.cpp UserTest.cpp From 4dd5ac906f9efdba3034b70acdda406348dc6f53 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 3 Dec 2024 10:24:39 -0800 Subject: [PATCH 110/191] [RISCV] Improve coverage for spread(N) shuffles I'd already included a few cases for spread(N) in the decompress(N) variants, but rename for clarity and add a couple more edge cases. i.e. spread(N, 0) produces a, undef, b, undef, ... --- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 68 +++++++++++++++---- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 5b01eae1ba3c0..21417fe8deefb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -810,8 +810,8 @@ define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) { ret <8 x i32> %out } -define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) { -; CHECK-LABEL: shuffle_decompress2_singlesrc_e32: +define <8 x i32> @shuffle_spread2_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_spread2_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vwaddu.vv v10, v8, v8 @@ -823,18 +823,46 @@ define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) { ret <8 x i32> %out } -define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) { -; RV32-LABEL: shuffle_decompress3_singlesrc_e32: +define <8 x i32> @shuffle_spread2_singlesrc_e32_index1(<8 x i32> %v) { +; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v10, v8, v8 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vwmaccu.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_spread2_singlesrc_e32_index2(<8 x i32> %v) { +; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsrl.vi v10, v10, 1 +; CHECK-NEXT: vadd.vi v12, v10, -1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_spread3_singlesrc_e32(<8 x i32> %v) { +; RV32-LABEL: shuffle_spread3_singlesrc_e32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI55_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0) +; RV32-NEXT: lui a0, %hi(.LCPI57_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v12, (a0) ; RV32-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; -; RV64-LABEL: shuffle_decompress3_singlesrc_e32: +; RV64-LABEL: shuffle_spread3_singlesrc_e32: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 32769 ; RV64-NEXT: slli a0, a0, 21 @@ -849,8 +877,8 @@ define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) { } ; TODO: This should be a single vslideup.vi -define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) { -; CHECK-LABEL: shuffle_decompress4_singlesrc_e32: +define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { +; CHECK-LABEL: shuffle_spread4_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 @@ -864,8 +892,8 @@ define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) { } ; TODO: This should be either a single vslideup.vi or two widening interleaves. -define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) { -; CHECK-LABEL: shuffle_decompress4_singlesrc_e8: +define <8 x i8> @shuffle_spread4_singlesrc_e8(<8 x i8> %v) { +; CHECK-LABEL: shuffle_spread4_singlesrc_e8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vid.v v9 @@ -877,11 +905,25 @@ define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) { ret <8 x i8> %out } +define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) { +; CHECK-LABEL: shuffle_spread8_singlesrc_e8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsrl.vi v12, v10, 3 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> + ret <32 x i8> %out +} + define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_decompress_singlesrc_e32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI58_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0) +; CHECK-NEXT: lui a0, %hi(.LCPI61_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 From b8805c88ce09fe8cc7e469162cbef05722559566 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 3 Dec 2024 10:27:01 -0800 Subject: [PATCH 111/191] [RISCV] Clang-format a few lines to remove diff in a nearby patch --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b2e96b63a8095..1e185956bd30e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4440,10 +4440,10 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, // a zero extend and the simm5 check in isel would fail. // FIXME: Should we ignore the upper bits in isel instead? unsigned ExtOpc = - isa(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; + isa(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, - DAG.getUNDEF(VT), Scalar, VL); + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar, + VL); } // Is this a shuffle extracts either the even or odd elements of a vector? From 4b9bf660f3a3db3b22dd33aa5b23986748b09ec3 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 3 Dec 2024 18:45:30 +0000 Subject: [PATCH 112/191] [gn build] Port acf3b1aa932b --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 3 ++- llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 59ecb66f2bcb0..bcb46d919b6c1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -43,12 +43,14 @@ static_library("CodeGen") { "CodeGen.cpp", "CodeGenCommonISel.cpp", "CodeGenPrepare.cpp", + "CodeGenTargetMachineImpl.cpp", "CommandFlags.cpp", "ComplexDeinterleavingPass.cpp", "CriticalAntiDepBreaker.cpp", "DFAPacketizer.cpp", "DeadMachineInstructionElim.cpp", "DetectDeadLanes.cpp", + "DroppedVariableStats.cpp", "DwarfEHPrepare.cpp", "EHContGuardCatchret.cpp", "EarlyIfConversion.cpp", @@ -83,7 +85,6 @@ static_library("CodeGen") { "IntrinsicLowering.cpp", "JMCInstrumenter.cpp", "KCFI.cpp", - "CodeGenTargetMachineImpl.cpp", "LatencyPriorityQueue.cpp", "LazyMachineBlockFrequencyInfo.cpp", "LexicalScopes.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index a3f89a5648cb5..dc01cc9a40a9c 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -26,6 +26,7 @@ unittest("CodeGenTests") { "CCStateTest.cpp", "DIEHashTest.cpp", "DIETest.cpp", + "DroppedVariableStatsIRTest.cpp", "DwarfStringPoolEntryRefTest.cpp", "InstrRefLDVTest.cpp", "LexicalScopesTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index ccee5d79afdcc..b19d54d7ed92f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -28,7 +28,6 @@ unittest("IRTests") { "DemandedBitsTest.cpp", "DominatorTreeBatchUpdatesTest.cpp", "DominatorTreeTest.cpp", - "DroppedVariableStatsTest.cpp", "FunctionTest.cpp", "IRBuilderTest.cpp", "InstructionsTest.cpp", From 25b1896686177673dcd548489e92c7880d576948 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 3 Dec 2024 13:56:15 -0500 Subject: [PATCH 113/191] Fix test for bots where -std=c99 is the default Amends 9791f258079a4334c61c64cb62d9746a3db2c25c This addresses the issue found by: https://lab.llvm.org/buildbot/#/builders/144/builds/13023 --- clang/test/Sema/enum.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/test/Sema/enum.c b/clang/test/Sema/enum.c index f12ce61ac13a6..3db301dab0a45 100644 --- a/clang/test/Sema/enum.c +++ b/clang/test/Sema/enum.c @@ -129,6 +129,10 @@ typedef struct Color NewColor; // expected-error {{use of 'Color' with tag type #else _Static_assert(__has_extension(c_fixed_enum), ""); _Static_assert(!__has_feature(c_fixed_enum), ""); +#if __STDC_VERSION__ < 201112L + // expected-warning@-3 {{'_Static_assert' is a C11 extension}} + // expected-warning@-3 {{'_Static_assert' is a C11 extension}} +#endif #endif typedef enum : unsigned char { Pink, Black, Cyan } Color; // pre-c23-warning {{enumeration types with a fixed underlying type are a C23 extension}} From f7151248579330df8ea68003f60b778149e0f262 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 3 Dec 2024 11:02:03 -0800 Subject: [PATCH 114/191] [lldb-dap] Fix the vscode-uninstall command The command was using the wrong publisher name ("llvm" rather than "llvm-vs-code-extensions") resulting in the command complaining: ``` npm run vscode-uninstall > lldb-dap@0.2.6 vscode-uninstall > code --uninstall-extension llvm.lldb-dap Extension 'llvm.lldb-dap' is not installed. Make sure you use the full extension ID, including the publisher, e.g.: ms-dotnettools.csharp ``` --- lldb/tools/lldb-dap/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 9155163c65ba5..5e9a7de9109ec 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -44,7 +44,7 @@ "format": "npx prettier './src-ts/' --write", "package": "vsce package --out ./out/lldb-dap.vsix", "publish": "vsce publish", - "vscode-uninstall": "code --uninstall-extension llvm.lldb-dap", + "vscode-uninstall": "code --uninstall-extension llvm-vs-code-extensions.lldb-dap", "vscode-install": "code --install-extension ./out/lldb-dap.vsix" }, "contributes": { @@ -513,4 +513,4 @@ } ] } -} \ No newline at end of file +} From 0ccd18ead138e9efc56b6c16ded6c3f4df86ae91 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 3 Dec 2024 14:06:26 -0500 Subject: [PATCH 115/191] Typo fix; NFC --- clang-tools-extra/clangd/index/BackgroundRebuild.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/index/BackgroundRebuild.cpp b/clang-tools-extra/clangd/index/BackgroundRebuild.cpp index 79383be012f83..4dc2d3b1d059b 100644 --- a/clang-tools-extra/clangd/index/BackgroundRebuild.cpp +++ b/clang-tools-extra/clangd/index/BackgroundRebuild.cpp @@ -1,4 +1,4 @@ -//===-- BackgroundRebuild.cpp - when to rebuild thei background index -----===// +//===-- BackgroundRebuild.cpp - when to rebuild the background index ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 384562495bae44be053c1bbd40c359ef4b82d803 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 3 Dec 2024 11:11:32 -0800 Subject: [PATCH 116/191] [lldb] For a host socket, add a method to print the listening address. (#118330) This is most useful if you are listening on an address like 'localhost:0' and want to know the resolved ip + port of the socket listener. --- lldb/include/lldb/Host/Socket.h | 6 +++ lldb/include/lldb/Host/common/TCPSocket.h | 4 ++ lldb/include/lldb/Host/posix/DomainSocket.h | 4 ++ lldb/source/Host/common/TCPSocket.cpp | 8 ++++ lldb/source/Host/posix/DomainSocket.cpp | 14 +++++++ lldb/unittests/Host/SocketTest.cpp | 41 ++++++++++++++++++++- 6 files changed, 76 insertions(+), 1 deletion(-) diff --git a/lldb/include/lldb/Host/Socket.h b/lldb/include/lldb/Host/Socket.h index e98797b36c8a5..4585eac12efb9 100644 --- a/lldb/include/lldb/Host/Socket.h +++ b/lldb/include/lldb/Host/Socket.h @@ -11,6 +11,7 @@ #include #include +#include #include "lldb/Host/MainLoopBase.h" #include "lldb/Utility/Timeout.h" @@ -151,6 +152,11 @@ class Socket : public IOObject { // If this Socket is connected then return the URI used to connect. virtual std::string GetRemoteConnectionURI() const { return ""; }; + // If the Socket is listening then return the URI for clients to connect. + virtual std::vector GetListeningConnectionURI() const { + return {}; + } + protected: Socket(SocketProtocol protocol, bool should_close); diff --git a/lldb/include/lldb/Host/common/TCPSocket.h b/lldb/include/lldb/Host/common/TCPSocket.h index ca36622691fe9..cb950c0015ea6 100644 --- a/lldb/include/lldb/Host/common/TCPSocket.h +++ b/lldb/include/lldb/Host/common/TCPSocket.h @@ -13,6 +13,8 @@ #include "lldb/Host/Socket.h" #include "lldb/Host/SocketAddress.h" #include +#include +#include namespace lldb_private { class TCPSocket : public Socket { @@ -52,6 +54,8 @@ class TCPSocket : public Socket { std::string GetRemoteConnectionURI() const override; + std::vector GetListeningConnectionURI() const override; + private: TCPSocket(NativeSocket socket, const TCPSocket &listen_socket); diff --git a/lldb/include/lldb/Host/posix/DomainSocket.h b/lldb/include/lldb/Host/posix/DomainSocket.h index d4e0d43ee169c..3dbe6206da2c5 100644 --- a/lldb/include/lldb/Host/posix/DomainSocket.h +++ b/lldb/include/lldb/Host/posix/DomainSocket.h @@ -10,6 +10,8 @@ #define LLDB_HOST_POSIX_DOMAINSOCKET_H #include "lldb/Host/Socket.h" +#include +#include namespace lldb_private { class DomainSocket : public Socket { @@ -27,6 +29,8 @@ class DomainSocket : public Socket { std::string GetRemoteConnectionURI() const override; + std::vector GetListeningConnectionURI() const override; + protected: DomainSocket(SocketProtocol protocol); diff --git a/lldb/source/Host/common/TCPSocket.cpp b/lldb/source/Host/common/TCPSocket.cpp index 5d863954ee886..d0055c3b6c44f 100644 --- a/lldb/source/Host/common/TCPSocket.cpp +++ b/lldb/source/Host/common/TCPSocket.cpp @@ -115,6 +115,14 @@ std::string TCPSocket::GetRemoteConnectionURI() const { return ""; } +std::vector TCPSocket::GetListeningConnectionURI() const { + std::vector URIs; + for (const auto &[fd, addr] : m_listen_sockets) + URIs.emplace_back(llvm::formatv("connection://[{0}]:{1}", + addr.GetIPAddress(), addr.GetPort())); + return URIs; +} + Status TCPSocket::CreateSocket(int domain) { Status error; if (IsValid()) diff --git a/lldb/source/Host/posix/DomainSocket.cpp b/lldb/source/Host/posix/DomainSocket.cpp index 0451834630d33..9a0b385d998bf 100644 --- a/lldb/source/Host/posix/DomainSocket.cpp +++ b/lldb/source/Host/posix/DomainSocket.cpp @@ -175,3 +175,17 @@ std::string DomainSocket::GetRemoteConnectionURI() const { "{0}://{1}", GetNameOffset() == 0 ? "unix-connect" : "unix-abstract-connect", name); } + +std::vector DomainSocket::GetListeningConnectionURI() const { + if (m_socket == kInvalidSocketValue) + return {}; + + struct sockaddr_un addr; + bzero(&addr, sizeof(struct sockaddr_un)); + addr.sun_family = AF_UNIX; + socklen_t addr_len = sizeof(struct sockaddr_un); + if (::getsockname(m_socket, (struct sockaddr *)&addr, &addr_len) != 0) + return {}; + + return {llvm::formatv("unix-connect://{0}", addr.sun_path)}; +} diff --git a/lldb/unittests/Host/SocketTest.cpp b/lldb/unittests/Host/SocketTest.cpp index b20cfe5464028..a74352c19725d 100644 --- a/lldb/unittests/Host/SocketTest.cpp +++ b/lldb/unittests/Host/SocketTest.cpp @@ -88,6 +88,28 @@ TEST_P(SocketTest, DomainListenConnectAccept) { CreateDomainConnectedSockets(Path, &socket_a_up, &socket_b_up); } +TEST_P(SocketTest, DomainListenGetListeningConnectionURI) { + llvm::SmallString<64> Path; + std::error_code EC = + llvm::sys::fs::createUniqueDirectory("DomainListenConnectAccept", Path); + ASSERT_FALSE(EC); + llvm::sys::path::append(Path, "test"); + + // Skip the test if the $TMPDIR is too long to hold a domain socket. + if (Path.size() > 107u) + return; + + auto listen_socket_up = std::make_unique( + /*should_close=*/true); + Status error = listen_socket_up->Listen(Path, 5); + ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded()); + ASSERT_TRUE(listen_socket_up->IsValid()); + + ASSERT_THAT( + listen_socket_up->GetListeningConnectionURI(), + testing::ElementsAre(llvm::formatv("unix-connect://{0}", Path).str())); +} + TEST_P(SocketTest, DomainMainLoopAccept) { llvm::SmallString<64> Path; std::error_code EC = @@ -225,12 +247,29 @@ TEST_P(SocketTest, TCPListen0GetPort) { if (!HostSupportsIPv4()) return; llvm::Expected> sock = - Socket::TcpListen("10.10.12.3:0", false); + Socket::TcpListen("10.10.12.3:0", 5); ASSERT_THAT_EXPECTED(sock, llvm::Succeeded()); ASSERT_TRUE(sock.get()->IsValid()); EXPECT_NE(sock.get()->GetLocalPortNumber(), 0); } +TEST_P(SocketTest, TCPListen0GetListeningConnectionURI) { + if (!HostSupportsProtocol()) + return; + + std::string addr = llvm::formatv("[{0}]:0", GetParam().localhost_ip).str(); + llvm::Expected> sock = Socket::TcpListen(addr); + ASSERT_THAT_EXPECTED(sock, llvm::Succeeded()); + ASSERT_TRUE(sock.get()->IsValid()); + + EXPECT_THAT( + sock.get()->GetListeningConnectionURI(), + testing::ElementsAre(llvm::formatv("connection://[{0}]:{1}", + GetParam().localhost_ip, + sock->get()->GetLocalPortNumber()) + .str())); +} + TEST_P(SocketTest, TCPGetConnectURI) { std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; From fc11b6790e7439edc5399d1c9626c538d5520e6c Mon Sep 17 00:00:00 2001 From: nawrinsu Date: Tue, 3 Dec 2024 11:57:30 -0800 Subject: [PATCH 117/191] [LLVM][Maintainers] Update LLVM maintainers for OpenMP runtime (#118521) --- llvm/Maintainers.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 7f310d3762429..e69fe62ac175b 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -450,8 +450,11 @@ tstellar@redhat.com (email), [tstellar](https://github.com/tstellar) (GitHub) #### OpenMP (runtime library) -Andrey Churbanov \ -andrey.churbanov@intel.com (email), [AndreyChurbanov](https://github.com/AndreyChurbanov) (GitHub) +Michael Klemm \ +michael.klemm@amd.com (email), [mjklemm](https://github.com/mjklemm) (GitHub) + +Terry Wilmarth \ +terry.l.wilmarth@intel.com (email), [TerryLWilmarth](https://github.com/TerryLWilmarth) (GitHub) #### Polly From cd04653c4e7d66e9d3cc8e442c4fd6ef124702a9 Mon Sep 17 00:00:00 2001 From: wldfngrs Date: Tue, 3 Dec 2024 21:08:46 +0100 Subject: [PATCH 118/191] [libc][math][c23] Add sinf16 C23 math function (#116674) Co-authored-by: OverMighty --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/newhdrgen/yaml/math.yaml | 7 ++ libc/src/math/CMakeLists.txt | 1 + libc/src/math/generic/CMakeLists.txt | 21 +++++ libc/src/math/generic/sincosf16_utils.h | 46 +++++++++- libc/src/math/generic/sinf16.cpp | 108 +++++++++++++++++++++++ libc/src/math/generic/tanpif16.cpp | 4 +- libc/src/math/sinf16.h | 21 +++++ libc/test/src/math/CMakeLists.txt | 11 +++ libc/test/src/math/sinf16_test.cpp | 40 +++++++++ libc/test/src/math/smoke/CMakeLists.txt | 11 +++ libc/test/src/math/smoke/sinf16_test.cpp | 33 +++++++ 13 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 libc/src/math/generic/sinf16.cpp create mode 100644 libc/src/math/sinf16.h create mode 100644 libc/test/src/math/sinf16_test.cpp create mode 100644 libc/test/src/math/smoke/sinf16_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index af7f429561fe0..5e9cc71279ab1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -700,6 +700,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.scalbnf16 libc.src.math.setpayloadf16 libc.src.math.setpayloadsigf16 + libc.src.math.sinf16 libc.src.math.sinhf16 libc.src.math.sinpif16 libc.src.math.sqrtf16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 2b86f49a3619e..4934e93ccb164 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -336,7 +336,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | rsqrt | | | | | | 7.12.7.9 | F.10.4.9 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| sin | |check| | |check| | | | | 7.12.4.6 | F.10.1.6 | +| sin | |check| | |check| | | |check| | | 7.12.4.6 | F.10.1.6 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | sincos | |check| | |check| | | | | | | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml index e09f0929e45f8..00efc34789667 100644 --- a/libc/newhdrgen/yaml/math.yaml +++ b/libc/newhdrgen/yaml/math.yaml @@ -2339,6 +2339,13 @@ functions: return_type: float arguments: - type: float + - name: sinf16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: _Float16 + guard: LIBC_TYPES_HAS_FLOAT16 - name: sinhf standards: - stdc diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 76a5e491effa0..390a59d07a28b 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -484,6 +484,7 @@ add_math_entrypoint_object(sincosf) add_math_entrypoint_object(sin) add_math_entrypoint_object(sinf) +add_math_entrypoint_object(sinf16) add_math_entrypoint_object(sinpif) add_math_entrypoint_object(sinpif16) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index a5d17ad023f52..aeb758d4a092d 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -498,6 +498,27 @@ add_entrypoint_object( ${libc_opt_high_flag} ) +add_entrypoint_object( + sinf16 + SRCS + sinf16.cpp + HDRS + ../sinf16.h + DEPENDS + .sincosf16_utils + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( sincos SRCS diff --git a/libc/src/math/generic/sincosf16_utils.h b/libc/src/math/generic/sincosf16_utils.h index 83511755a56c4..5e5edd4a8c85b 100644 --- a/libc/src/math/generic/sincosf16_utils.h +++ b/libc/src/math/generic/sincosf16_utils.h @@ -11,6 +11,7 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" @@ -46,10 +47,31 @@ LIBC_INLINE int32_t range_reduction_sincospif16(float x, float &y) { return static_cast(kf); } -LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k, - float &sin_y, float &cosm1_y) { - float y; - int32_t k = range_reduction_sincospif16(xf, y); +// Recall, range reduction: +// k = round(x * 32/pi) +// y = x * 32/pi - k +// +// The constant 0x1.45f306dc9c883p3 is 32/pi rounded to double-precision. +// 32/pi is generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(32/pi, D, RN); +// +// The precision choice of 'double' is to minimize rounding errors +// in this initial scaling step, preserving enough bits so errors accumulated +// while computing the subtraction: y = x * 32/pi - round(x * 32/pi) +// are beyond the least-significant bit of single-precision used during +// further intermediate computation. +LIBC_INLINE int32_t range_reduction_sincosf16(float x, float &y) { + double prod = x * 0x1.45f306dc9c883p3; + double kf = fputil::nearest_integer(prod); + y = static_cast(prod - kf); + + return static_cast(kf); +} + +static LIBC_INLINE void sincosf16_poly_eval(int32_t k, float y, float &sin_k, + float &cos_k, float &sin_y, + float &cosm1_y) { sin_k = SIN_K_PI_OVER_32[k & 63]; cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; @@ -72,6 +94,22 @@ LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k, 0x1.a6f7a2p-29f); } +LIBC_INLINE void sincosf16_eval(float xf, float &sin_k, float &cos_k, + float &sin_y, float &cosm1_y) { + float y; + int32_t k = range_reduction_sincosf16(xf, y); + + sincosf16_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); +} + +LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k, + float &sin_y, float &cosm1_y) { + float y; + int32_t k = range_reduction_sincospif16(xf, y); + + sincosf16_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); +} + } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H diff --git a/libc/src/math/generic/sinf16.cpp b/libc/src/math/generic/sinf16.cpp new file mode 100644 index 0000000000000..86546348ba739 --- /dev/null +++ b/libc/src/math/generic/sinf16.cpp @@ -0,0 +1,108 @@ +//===-- Half-precision sin(x) function ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/sinf16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "sincosf16_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +constexpr size_t N_EXCEPTS = 4; + +constexpr fputil::ExceptValues SINF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + {0x2b45, 0x2b43, 1, 0, 1}, + {0x585c, 0x3ba3, 1, 0, 1}, + {0x5cb0, 0xbbff, 0, 1, 0}, + {0x51f5, 0xb80f, 0, 1, 0}, +}}; + +LLVM_LIBC_FUNCTION(float16, sinf16, (float16 x)) { + using FPBits = fputil::FPBits; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + float xf = x; + + // Range reduction: + // For |x| > pi/32, we perform range reduction as follows: + // Find k and y such that: + // x = (k + y) * pi/32 + // k is an integer, |y| < 0.5 + // + // This is done by performing: + // k = round(x * 32/pi) + // y = x * 32/pi - k + // + // Once k and y are computed, we then deduce the answer by the sine of sum + // formula: + // sin(x) = sin((k + y) * pi/32) + // = sin(k * pi/32) * cos(y * pi/32) + + // sin(y * pi/32) * cos(k * pi/32) + + // Handle exceptional values + if (LIBC_UNLIKELY(x_abs == 0x585c || x_abs == 0x5cb0 || x_abs == 0x51f5 || + x_abs == 0x2b45)) { + bool x_sign = x_u >> 15; + if (auto r = SINF16_EXCEPTS.lookup_odd(x_abs, x_sign); + LIBC_UNLIKELY(r.has_value())) + return r.value(); + } + + int rounding = fputil::quick_get_round(); + + // Exhaustive tests show that for |x| <= 0x1.f4p-11, 1ULP rounding errors + // occur. To fix this, the following apply: + if (LIBC_UNLIKELY(x_abs <= 0x13d0)) { + // sin(+/-0) = +/-0 + if (LIBC_UNLIKELY(x_abs == 0U)) + return x; + + // When x > 0, and rounding upward, sin(x) == x. + // When x < 0, and rounding downward, sin(x) == x. + if ((rounding == FE_UPWARD && xbits.is_pos()) || + (rounding == FE_DOWNWARD && xbits.is_neg())) + return x; + + // When x < 0, and rounding upward, sin(x) == (x - 1ULP) + if (rounding == FE_UPWARD && xbits.is_neg()) { + x_u--; + return FPBits(x_u).get_val(); + } + } + + if (xbits.is_inf_or_nan()) { + if (xbits.is_inf()) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + } + + return x + FPBits::quiet_nan().get_val(); + } + + float sin_k, cos_k, sin_y, cosm1_y; + sincosf16_eval(xf, sin_k, cos_k, sin_y, cosm1_y); + + if (LIBC_UNLIKELY(sin_y == 0 && sin_k == 0)) + return FPBits::zero(xbits.sign()).get_val(); + + // Since, cosm1_y = cos_y - 1, therfore: + // sin(x) = cos_k * sin_y + sin_k + (cosm1_y * sin_k) + return fputil::cast(fputil::multiply_add( + sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k))); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/tanpif16.cpp b/libc/src/math/generic/tanpif16.cpp index ab3c9cb2122ba..67635536ee319 100644 --- a/libc/src/math/generic/tanpif16.cpp +++ b/libc/src/math/generic/tanpif16.cpp @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { constexpr size_t N_EXCEPTS = 21; -constexpr fputil::ExceptValues TANF16_EXCEPTS{{ +constexpr fputil::ExceptValues TANPIF16_EXCEPTS{{ // (input, RZ output, RU offset, RD offset, RN offset) {0x07f2, 0x0e3d, 1, 0, 0}, {0x086a, 0x0eee, 1, 0, 1}, {0x08db, 0x0fa0, 1, 0, 0}, {0x094c, 0x1029, 1, 0, 0}, @@ -49,7 +49,7 @@ LLVM_LIBC_FUNCTION(float16, tanpif16, (float16 x)) { return x; bool x_sign = x_u >> 15; - if (auto r = TANF16_EXCEPTS.lookup_odd(x_abs, x_sign); + if (auto r = TANPIF16_EXCEPTS.lookup_odd(x_abs, x_sign); LIBC_UNLIKELY(r.has_value())) return r.value(); } diff --git a/libc/src/math/sinf16.h b/libc/src/math/sinf16.h new file mode 100644 index 0000000000000..23f1aa99b6233 --- /dev/null +++ b/libc/src/math/sinf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for sinf16 ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_SINF16_H +#define LLVM_LIBC_SRC_MATH_SINF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 sinf16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_SINF16_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 610f4d9fc1a3b..ea75720df4f43 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -85,6 +85,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + sinf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + sinf16_test.cpp + DEPENDS + libc.src.math.sinf16 +) + add_fp_unittest( sinpif_test NEED_MPFR diff --git a/libc/test/src/math/sinf16_test.cpp b/libc/test/src/math/sinf16_test.cpp new file mode 100644 index 0000000000000..b05501cb0f145 --- /dev/null +++ b/libc/test/src/math/sinf16_test.cpp @@ -0,0 +1,40 @@ +//===-- Exhaustive test for sinf16 ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/sinf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcSinf16Test = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +// Range: [0, Inf] +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7c00U; + +// Range: [-Inf, 0] +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xfc00U; + +TEST_F(LlvmLibcSinf16Test, PositiveRange) { + for (uint16_t v = POS_START; v <= POS_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, + LIBC_NAMESPACE::sinf16(x), 0.5); + } +} + +TEST_F(LlvmLibcSinf16Test, NegativeRange) { + for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, + LIBC_NAMESPACE::sinf16(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index e9c785f7d9330..2c1c4dba73846 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -49,6 +49,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + sinf16_test + SUITE + libc-math-smoke-tests + SRCS + sinf16_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.sinf16 +) + add_fp_unittest( sinpif_test SUITE diff --git a/libc/test/src/math/smoke/sinf16_test.cpp b/libc/test/src/math/smoke/sinf16_test.cpp new file mode 100644 index 0000000000000..2966c3c952fd2 --- /dev/null +++ b/libc/test/src/math/smoke/sinf16_test.cpp @@ -0,0 +1,33 @@ +//===-- Unittests for sinf16 ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/errno/libc_errno.h" +#include "src/math/sinf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcSinf16Test = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcSinf16Test, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf16(aNaN)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::sinf16(zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::sinf16(neg_zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf16(inf)); + EXPECT_MATH_ERRNO(EDOM); + + EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf16(neg_inf)); + EXPECT_MATH_ERRNO(EDOM); +} From 2d57333da432921762323718351a21532867588c Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Tue, 3 Dec 2024 12:11:47 -0800 Subject: [PATCH 119/191] [flang] Fix crash in HLFIR generation (#118399) Structure constructors with multiple components would crash when components were from parent types. The search for the right parent component must be done anew for each component. Fixes https://github.com/llvm/llvm-project/issues/118270, https://github.com/llvm/llvm-project/issues/96994, and https://github.com/llvm/llvm-project/issues/105848. --- flang/lib/Lower/ConvertExprToHLFIR.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp index e93fbc562f9b1..4ab319b016caf 100644 --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -1696,18 +1696,17 @@ class HlfirBuilder { // required chains of hlfir.designate to address the parent components so // that the StructureConstructor can later be lowered by addressing these // parent components if needed. Note: the front-end orders the components in - // structure constructors. The code below relies on the component to appear - // in order. + // structure constructors. using ValueAndParent = std::tuple; llvm::SmallVector valuesAndParents; - Fortran::lower::ComponentReverseIterator compIterator( - ctor.result().derivedTypeSpec()); - hlfir::EntityWithAttributes currentParent = varOp; for (const auto &value : llvm::reverse(ctor.values())) { const Fortran::semantics::Symbol &compSym = *value.first; - while (!compIterator.lookup(compSym.name())) { + hlfir::EntityWithAttributes currentParent = varOp; + for (Fortran::lower::ComponentReverseIterator compIterator( + ctor.result().derivedTypeSpec()); + !compIterator.lookup(compSym.name());) { const auto &parentType = compIterator.advanceToParentType(); llvm::StringRef parentName = toStringRef(parentType.name()); auto baseRecTy = mlir::cast( From d6cd214dd6ae35ea50be4fdc296ef9091f762375 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 3 Dec 2024 12:14:16 -0800 Subject: [PATCH 120/191] [ThinLTO][LowerTypeTests] Don't compute address taken set unless CFI (NFC) (#118508) The AddressTaken set used for CFI with regular LTO was being computed on the ExportSummary regardless of whether any CFI metadata existed. In the case of ThinLTO, the ExportSummary is the global summary index for the target, and the lack of guard in this code meant this was being computed on the ThinLTO index even when there was an empty regular LTO module, since the backend is called on the combined module to generate the expected output file (normally this is trivial as there is no IR). Move the computation of the AddressTaken set into the condition checking for CFI to avoid this overhead. This change resulted in a 20% speedup in the thin link of a large target. It looks like the outer loop has existed here for several years, but likely became a larger overhead after the inner loop was added very recently in PR113987. I will send a separate patch to refactor the ThinLTO backend handling to avoid invoking the opt pipeline if the module is empty, in case there are other summary-based analyses in some of the passes now or in the future. This change is still desireable as by default regular LTO modules contain summaries, or we can have split thin and regular LTO modules, and if they don't involve CFI these would still unnecessarily compute the AddressTaken set. --- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 87d2432803062..e3caefe70311b 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -2090,20 +2090,19 @@ bool LowerTypeTestsModule::lower() { }; MapVector ExportedFunctions; if (ExportSummary) { - // A set of all functions that are address taken by a live global object. - DenseSet AddressTaken; - for (auto &I : *ExportSummary) - for (auto &GVS : I.second.SummaryList) - if (GVS->isLive()) - for (const auto &Ref : GVS->refs()) { - AddressTaken.insert(Ref.getGUID()); - for (auto &RefGVS : Ref.getSummaryList()) - if (auto Alias = dyn_cast(RefGVS.get())) - AddressTaken.insert(Alias->getAliaseeGUID()); - } - NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); if (CfiFunctionsMD) { + // A set of all functions that are address taken by a live global object. + DenseSet AddressTaken; + for (auto &I : *ExportSummary) + for (auto &GVS : I.second.SummaryList) + if (GVS->isLive()) + for (const auto &Ref : GVS->refs()) { + AddressTaken.insert(Ref.getGUID()); + for (auto &RefGVS : Ref.getSummaryList()) + if (auto Alias = dyn_cast(RefGVS.get())) + AddressTaken.insert(Alias->getAliaseeGUID()); + } for (auto *FuncMD : CfiFunctionsMD->operands()) { assert(FuncMD->getNumOperands() >= 2); StringRef FunctionName = From fdd09e9de579e6915d467c1b72efb8c653739602 Mon Sep 17 00:00:00 2001 From: Rashmi Mudduluru Date: Tue, 3 Dec 2024 12:26:20 -0800 Subject: [PATCH 121/191] [ASTMatchers] AST matcher support for ObjC pointers (#117021) Add `ObjCInterfaceDecl` to the list of types supported by the `hasType` and `hasDeclaration` matchers, `ObjCObjectPointerType` to the list of types supported by `pointee`. These AST matcher improvements will help the new WebKit checker for unsafe casts ([https://github.com/llvm/llvm-project/pull/114606](https://github.com/llvm/llvm-project/pull/114606)) match on unsafe Objective-C pointer casts. --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/include/clang/ASTMatchers/ASTMatchers.h | 5 +++-- clang/include/clang/ASTMatchers/ASTMatchersInternal.h | 10 +++++++++- clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 3 ++- .../unittests/ASTMatchers/ASTMatchersTraversalTest.cpp | 6 ++++++ 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0b945d73f0d74..5026d4475b38a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -975,6 +975,10 @@ AST Matchers - Add ``exportDecl`` matcher to match export declaration. +- Ensure ``hasType`` and ``hasDeclaration`` match Objective-C interface declarations. + +- Ensure ``pointee`` matches Objective-C pointer types. + clang-format ------------ diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index efad600a3c58c..897aa25dc95cc 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -4044,7 +4044,7 @@ AST_POLYMORPHIC_MATCHER_P_OVERLOAD( AST_POLYMORPHIC_MATCHER_P_OVERLOAD( hasType, AST_POLYMORPHIC_SUPPORTED_TYPES(Expr, FriendDecl, ValueDecl, - CXXBaseSpecifier), + CXXBaseSpecifier, ObjCInterfaceDecl), internal::Matcher, InnerMatcher, 1) { QualType QT = internal::getUnderlyingType(Node); if (!QT.isNull()) @@ -7445,7 +7445,8 @@ extern const AstTypeMatcher rValueReferenceType; AST_TYPELOC_TRAVERSE_MATCHER_DECL( pointee, getPointee, AST_POLYMORPHIC_SUPPORTED_TYPES(BlockPointerType, MemberPointerType, - PointerType, ReferenceType)); + PointerType, ReferenceType, + ObjCObjectPointerType)); /// Matches typedef types. /// diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h index ab8b146453e76..04804d5def046 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h +++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h @@ -161,6 +161,9 @@ inline QualType getUnderlyingType(const FriendDecl &Node) { inline QualType getUnderlyingType(const CXXBaseSpecifier &Node) { return Node.getType(); } +inline QualType getUnderlyingType(const ObjCInterfaceDecl &Node) { + return Node.getTypeForDecl()->getPointeeType(); +} /// Unifies obtaining a `TypeSourceInfo` from different node types. template { return matchesDecl(Node.getDecl(), Finder, Builder); } + bool matchesSpecialized(const ObjCInterfaceDecl &Node, ASTMatchFinder *Finder, + BoundNodesTreeBuilder *Builder) const { + return matchesDecl(Node.getCanonicalDecl(), Finder, Builder); + } + /// Extracts the operator new of the new call and returns whether the /// inner matcher matches on it. bool matchesSpecialized(const CXXNewExpr &Node, @@ -1213,7 +1221,7 @@ using HasDeclarationSupportedTypes = ElaboratedType, InjectedClassNameType, LabelStmt, AddrLabelExpr, MemberExpr, QualType, RecordType, TagType, TemplateSpecializationType, TemplateTypeParmType, TypedefType, - UnresolvedUsingType, ObjCIvarRefExpr>; + UnresolvedUsingType, ObjCIvarRefExpr, ObjCInterfaceDecl>; /// A Matcher that allows binding the node it matches to an id. /// diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 84a7fa4d36b48..bf9dc5f2373f9 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -1114,7 +1114,8 @@ AST_TYPELOC_TRAVERSE_MATCHER_DEF(hasValueType, AST_TYPELOC_TRAVERSE_MATCHER_DEF( pointee, AST_POLYMORPHIC_SUPPORTED_TYPES(BlockPointerType, MemberPointerType, - PointerType, ReferenceType)); + PointerType, ReferenceType, + ObjCObjectPointerType)); const internal::VariadicDynCastAllOfMatcher ompExecutableDirective; diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 1d18869a6b8af..75d6ca5ba17f8 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -283,6 +283,12 @@ TEST(HasDeclaration, HasDeclarationOfTypeAlias) { hasDeclaration(typeAliasTemplateDecl())))))))); } +TEST(HasDeclaration, HasDeclarationOfObjCInterface) { + EXPECT_TRUE(matchesObjC("@interface BaseClass @end void f() {BaseClass* b;}", + varDecl(hasType(objcObjectPointerType( + pointee(hasDeclaration(objcInterfaceDecl()))))))); +} + TEST(HasUnqualifiedDesugaredType, DesugarsUsing) { EXPECT_TRUE( matches("struct A {}; using B = A; B b;", From e0ae7793fca0c78919bc41ffd407acb62146ad47 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 3 Dec 2024 12:34:26 -0800 Subject: [PATCH 122/191] [libc] delete hdrgen (#117220) Thanks to the effort of @RoseZhang03 and @aaryanshukla under the guidance of @michaelrj-google and @amykhuang, we now have newhdrgen and no longer have a dependency on TableGen and thus LLVM in order to start bootstrapping a full build. This PR removes: - LIBC_HDRGEN_EXE; the in tree newhdrgen is the only hdrgen that can be used. - LIBC_USE_NEW_HEADER_GEN; newhdrgen is the default and only option. - LIBC_HDRGEN_ONLY; there is no need to have a distinct build step for old hdrgen. - libc-api-test and libc-api-test-tidy build targets. - Deletes all .td files. It does not rename newhdrgen to just hdrgen. Will follow up with a distinct PR for that. Link: #117209 Link: #117254 Fixes: #117208 --- libc/CMakeLists.txt | 57 +- libc/cmake/modules/LLVMLibCHeaderRules.cmake | 159 +- libc/config/baremetal/api.td | 59 - libc/config/gpu/api.td | 50 - libc/config/linux/api.td | 276 --- libc/config/public_api.td | 26 - libc/docs/dev/header_generation.rst | 3 - libc/docs/gpu/building.rst | 19 +- libc/include/CMakeLists.txt | 28 +- libc/spec/bsd_ext.td | 87 - libc/spec/gnu_ext.td | 316 --- libc/spec/gpu_ext.td | 18 - libc/spec/linux.td | 334 --- libc/spec/llvm_libc_ext.td | 116 - libc/spec/llvm_libc_stdfix_ext.td | 27 - libc/spec/posix.td | 1867 ----------------- libc/spec/spec.td | 253 --- libc/spec/stdc.td | 1827 ---------------- libc/spec/stdc_ext.td | 82 - libc/test/src/CMakeLists.txt | 78 - libc/utils/HdrGen/CMakeLists.txt | 22 - libc/utils/HdrGen/Command.cpp | 15 - libc/utils/HdrGen/Command.h | 54 - libc/utils/HdrGen/Generator.cpp | 203 -- libc/utils/HdrGen/Generator.h | 60 - libc/utils/HdrGen/IncludeFileCommand.cpp | 50 - libc/utils/HdrGen/IncludeFileCommand.h | 32 - libc/utils/HdrGen/Main.cpp | 62 - .../HdrGen/PrototypeTestGen/CMakeLists.txt | 5 - .../PrototypeTestGen/PrototypeTestGen.cpp | 106 - libc/utils/HdrGen/PublicAPICommand.cpp | 331 --- libc/utils/HdrGen/PublicAPICommand.h | 48 - libc/utils/HdrGen/README.md | 5 - libc/utils/LibcTableGenUtil/APIIndexer.cpp | 173 -- libc/utils/LibcTableGenUtil/APIIndexer.h | 86 - libc/utils/LibcTableGenUtil/CMakeLists.txt | 13 - llvm/CMakeLists.txt | 27 +- llvm/runtimes/CMakeLists.txt | 18 - 38 files changed, 50 insertions(+), 6942 deletions(-) delete mode 100644 libc/config/baremetal/api.td delete mode 100644 libc/config/gpu/api.td delete mode 100644 libc/config/linux/api.td delete mode 100644 libc/config/public_api.td delete mode 100644 libc/spec/bsd_ext.td delete mode 100644 libc/spec/gnu_ext.td delete mode 100644 libc/spec/gpu_ext.td delete mode 100644 libc/spec/linux.td delete mode 100644 libc/spec/llvm_libc_ext.td delete mode 100644 libc/spec/llvm_libc_stdfix_ext.td delete mode 100644 libc/spec/posix.td delete mode 100644 libc/spec/spec.td delete mode 100644 libc/spec/stdc.td delete mode 100644 libc/spec/stdc_ext.td delete mode 100644 libc/utils/HdrGen/CMakeLists.txt delete mode 100644 libc/utils/HdrGen/Command.cpp delete mode 100644 libc/utils/HdrGen/Command.h delete mode 100644 libc/utils/HdrGen/Generator.cpp delete mode 100644 libc/utils/HdrGen/Generator.h delete mode 100644 libc/utils/HdrGen/IncludeFileCommand.cpp delete mode 100644 libc/utils/HdrGen/IncludeFileCommand.h delete mode 100644 libc/utils/HdrGen/Main.cpp delete mode 100644 libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt delete mode 100644 libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp delete mode 100644 libc/utils/HdrGen/PublicAPICommand.cpp delete mode 100644 libc/utils/HdrGen/PublicAPICommand.h delete mode 100644 libc/utils/HdrGen/README.md delete mode 100644 libc/utils/LibcTableGenUtil/APIIndexer.cpp delete mode 100644 libc/utils/LibcTableGenUtil/APIIndexer.h delete mode 100644 libc/utils/LibcTableGenUtil/CMakeLists.txt diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index eee5b63bab513..fd82359022cff 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -50,52 +50,27 @@ set(LIBC_NAMESPACE ${default_namespace} CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'." ) - -add_subdirectory(newhdrgen) - - -if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD) - if(NOT LIBC_HDRGEN_EXE) - # We need to set up hdrgen first since other targets depend on it. - add_subdirectory(utils/LibcTableGenUtil) - add_subdirectory(utils/HdrGen) - # Calling add_tablegen sets variables like LIBC_TABLEGEN_EXE in - # PARENT_SCOPE which get lost until saved in the cache. - set(LIBC_TABLEGEN_EXE "${LIBC_TABLEGEN_EXE}" CACHE INTERNAL "") - set(LIBC_TABLEGEN_TARGET "${LIBC_TABLEGEN_TARGET}" CACHE INTERNAL "") - else() - message(STATUS "Will use ${LIBC_HDRGEN_EXE} for libc header generation.") - endif() -endif() # We will build the GPU utilities if we are not doing a runtimes build. option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF) -if(LIBC_BUILD_GPU_LOADER OR (LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD)) - add_subdirectory(utils/gpu) -endif() - -option(LIBC_USE_NEW_HEADER_GEN "Generate header files using new headergen instead of the old one" ON) - -set(NEED_LIBC_HDRGEN FALSE) -if(NOT LLVM_RUNTIMES_BUILD) - if("libc" IN_LIST LLVM_ENABLE_RUNTIMES) - set(NEED_LIBC_HDRGEN TRUE) - else() - foreach(_name ${LLVM_RUNTIME_TARGETS}) - if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES) - set(NEED_LIBC_HDRGEN TRUE) - break() +if(LIBC_BUILD_GPU_LOADER OR NOT LLVM_RUNTIMES_BUILD) + foreach(_name ${LLVM_RUNTIME_TARGETS}) + if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES) + if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda") + set(LIBC_NEED_LOADER_UTILS TRUE) endif() - endforeach() + endif() + endforeach() + if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR + "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda") + set(LIBC_NEED_LOADER_UTILS TRUE) + endif() + if(LIBC_NEED_LOADER_UTILS) + add_subdirectory(utils/gpu) + return() endif() endif() -option(LIBC_HDRGEN_ONLY "Only build the 'libc-hdrgen' executable" OFF) -if(LIBC_HDRGEN_ONLY OR NEED_LIBC_HDRGEN) - # When libc is build as part of the runtimes/bootstrap build's CMake run, we - # only need to build the host tools to build the libc. So, we just do enough - # to build libc-hdrgen and return. - return() -endif() -unset(NEED_LIBC_HDRGEN) + +add_subdirectory(newhdrgen) option(LIBC_CMAKE_VERBOSE_LOGGING "Log details warnings and notifications during CMake configuration." OFF) diff --git a/libc/cmake/modules/LLVMLibCHeaderRules.cmake b/libc/cmake/modules/LLVMLibCHeaderRules.cmake index 76c4e1f2d3244..8f24cd4b3023b 100644 --- a/libc/cmake/modules/LLVMLibCHeaderRules.cmake +++ b/libc/cmake/modules/LLVMLibCHeaderRules.cmake @@ -71,9 +71,9 @@ function(add_header target_name) ) endfunction(add_header) -function(add_gen_header2 target_name) +function(add_gen_header target_name) cmake_parse_arguments( - "ADD_GEN_HDR2" + "ADD_GEN_HDR" "PUBLIC" # No optional arguments "YAML_FILE;DEF_FILE;GEN_HDR" # Single value arguments "DEPENDS" # Multi value arguments @@ -84,25 +84,25 @@ function(add_gen_header2 target_name) add_library(${fq_target_name} INTERFACE) return() endif() - if(NOT ADD_GEN_HDR2_DEF_FILE) - message(FATAL_ERROR "`add_gen_hdr2` rule requires DEF_FILE to be specified.") + if(NOT ADD_GEN_HDR_DEF_FILE) + message(FATAL_ERROR "`add_gen_hdr` rule requires DEF_FILE to be specified.") endif() - if(NOT ADD_GEN_HDR2_GEN_HDR) - message(FATAL_ERROR "`add_gen_hdr2` rule requires GEN_HDR to be specified.") + if(NOT ADD_GEN_HDR_GEN_HDR) + message(FATAL_ERROR "`add_gen_hdr` rule requires GEN_HDR to be specified.") endif() - if(NOT ADD_GEN_HDR2_YAML_FILE) - message(FATAL_ERROR "`add_gen_hdr2` rule requires YAML_FILE to be specified.") + if(NOT ADD_GEN_HDR_YAML_FILE) + message(FATAL_ERROR "`add_gen_hdr` rule requires YAML_FILE to be specified.") endif() - set(absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_GEN_HDR2_GEN_HDR}) + set(absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_GEN_HDR_GEN_HDR}) file(RELATIVE_PATH relative_path ${LIBC_INCLUDE_SOURCE_DIR} ${absolute_path}) set(out_file ${LIBC_INCLUDE_DIR}/${relative_path}) - set(yaml_file ${CMAKE_SOURCE_DIR}/${ADD_GEN_HDR2_YAML_FILE}) - set(def_file ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_GEN_HDR2_DEF_FILE}) + set(yaml_file ${CMAKE_SOURCE_DIR}/${ADD_GEN_HDR_YAML_FILE}) + set(def_file ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_GEN_HDR_DEF_FILE}) set(fq_data_files "") - if(ADD_GEN_HDR2_DATA_FILES) - foreach(data_file IN LISTS ADD_GEN_HDR2_DATA_FILES) + if(ADD_GEN_HDR_DATA_FILES) + foreach(data_file IN LISTS ADD_GEN_HDR_DATA_FILES) list(APPEND fq_data_files "${CMAKE_CURRENT_SOURCE_DIR}/${data_file}") endforeach(data_file) endif() @@ -118,7 +118,7 @@ function(add_gen_header2 target_name) ${entry_points} --output_dir ${out_file} DEPENDS ${yaml_file} ${def_file} ${fq_data_files} - COMMENT "Generating header ${ADD_GEN_HDR2_GEN_HDR} from ${yaml_file} and ${def_file}" + COMMENT "Generating header ${ADD_GEN_HDR_GEN_HDR} from ${yaml_file} and ${def_file}" ) if(LIBC_TARGET_OS_IS_GPU) file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/llvm-libc-decls) @@ -136,132 +136,6 @@ function(add_gen_header2 target_name) ) endif() - if(ADD_GEN_HDR2_DEPENDS) - get_fq_deps_list(fq_deps_list ${ADD_GEN_HDR2_DEPENDS}) - # Dependencies of a add_header target can only be another add_gen_header target - # or an add_header target. - foreach(dep IN LISTS fq_deps_list) - get_target_property(header_file ${dep} HEADER_FILE_PATH) - if(NOT header_file) - message(FATAL_ERROR "Invalid dependency '${dep}' for '${fq_target_name}'.") - endif() - endforeach() - endif() - set(generated_hdr_target ${fq_target_name}.__generated_hdr__) - add_custom_target( - ${generated_hdr_target} - DEPENDS ${out_file} ${fq_deps_list} ${decl_out_file} - ) - - add_header_library( - ${target_name} - HDRS - ${out_file} - ) - - add_dependencies(${fq_target_name} ${generated_hdr_target}) - - set_target_properties( - ${fq_target_name} - PROPERTIES - HEADER_FILE_PATH ${out_file} - DECLS_FILE_PATH "${decl_out_file}" - DEPS "${fq_deps_list}" - ) - - -endfunction(add_gen_header2) - -# Usage: -# add_gen_header( -# -# DEF_FILE <.h.def file> -# GEN_HDR -# PARAMS -# DATA_FILES -# ) -function(add_gen_header target_name) - cmake_parse_arguments( - "ADD_GEN_HDR" - "PUBLIC" # No optional arguments - "DEF_FILE;GEN_HDR" # Single value arguments - "PARAMS;DATA_FILES;DEPENDS" # Multi value arguments - ${ARGN} - ) - get_fq_target_name(${target_name} fq_target_name) - if(NOT LLVM_LIBC_FULL_BUILD) - # We don't want to use generated headers if we are doing a non-full-build. - add_library(${fq_target_name} INTERFACE) - return() - endif() - if(NOT ADD_GEN_HDR_DEF_FILE) - message(FATAL_ERROR "`add_gen_hdr` rule requires DEF_FILE to be specified.") - endif() - if(NOT ADD_GEN_HDR_GEN_HDR) - message(FATAL_ERROR "`add_gen_hdr` rule requires GEN_HDR to be specified.") - endif() - - set(absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_GEN_HDR_GEN_HDR}) - file(RELATIVE_PATH relative_path ${LIBC_INCLUDE_SOURCE_DIR} ${absolute_path}) - set(out_file ${LIBC_INCLUDE_DIR}/${relative_path}) - set(in_file ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_GEN_HDR_DEF_FILE}) - - set(fq_data_files "") - if(ADD_GEN_HDR_DATA_FILES) - foreach(data_file IN LISTS ADD_GEN_HDR_DATA_FILES) - list(APPEND fq_data_files "${CMAKE_CURRENT_SOURCE_DIR}/${data_file}") - endforeach(data_file) - endif() - - set(replacement_params "") - if(ADD_GEN_HDR_PARAMS) - list(APPEND replacement_params "--args" ${ADD_GEN_HDR_PARAMS}) - endif() - - set(gen_hdr_script "${LIBC_BUILD_SCRIPTS_DIR}/gen_hdr.py") - - file(GLOB td_includes ${LIBC_SOURCE_DIR}/spec/*.td) - - set(ENTRYPOINT_NAME_LIST_ARG ${TARGET_ENTRYPOINT_NAME_LIST}) - list(TRANSFORM ENTRYPOINT_NAME_LIST_ARG PREPEND "--e=") - - if(LIBC_HDRGEN_EXE) - set(hdrgen_exe ${LIBC_HDRGEN_EXE}) - else() - set(hdrgen_exe ${LIBC_TABLEGEN_EXE}) - set(hdrgen_deps "${LIBC_TABLEGEN_EXE};${LIBC_TABLEGEN_TARGET}") - endif() - add_custom_command( - OUTPUT ${out_file} - COMMAND ${hdrgen_exe} -o ${out_file} --header ${ADD_GEN_HDR_GEN_HDR} - --def ${in_file} ${replacement_params} -I ${LIBC_SOURCE_DIR} - ${ENTRYPOINT_NAME_LIST_ARG} - ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td - - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS ${in_file} ${fq_data_files} ${td_includes} - ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td - ${hdrgen_deps} - ) - - if(LIBC_TARGET_OS_IS_GPU) - file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/llvm-libc-decls) - file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/llvm-libc-decls/gpu) - set(decl_out_file ${LIBC_INCLUDE_DIR}/llvm-libc-decls/${relative_path}) - add_custom_command( - OUTPUT ${decl_out_file} - COMMAND ${hdrgen_exe} -o ${decl_out_file} - --header ${ADD_GEN_HDR_GEN_HDR} --def ${in_file} --export-decls - ${replacement_params} -I ${LIBC_SOURCE_DIR} ${ENTRYPOINT_NAME_LIST_ARG} - ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td - - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS ${in_file} ${fq_data_files} ${td_includes} - ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td - ${hdrgen_deps} - ) - endif() - if(ADD_GEN_HDR_DEPENDS) get_fq_deps_list(fq_deps_list ${ADD_GEN_HDR_DEPENDS}) # Dependencies of a add_header target can only be another add_gen_header target @@ -285,9 +159,6 @@ function(add_gen_header target_name) ${out_file} ) - # We add the dependencies separately and not list under add_header_library's - # DEPENDS option above. This is because, deps of add_header_library are - # used with target_link_libraries. add_dependencies(${fq_target_name} ${generated_hdr_target}) set_target_properties( @@ -297,4 +168,6 @@ function(add_gen_header target_name) DECLS_FILE_PATH "${decl_out_file}" DEPS "${fq_deps_list}" ) + + endfunction(add_gen_header) diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td deleted file mode 100644 index 7421d86fabeb0..0000000000000 --- a/libc/config/baremetal/api.td +++ /dev/null @@ -1,59 +0,0 @@ -include "config/public_api.td" - -include "spec/stdc.td" -include "spec/stdc_ext.td" -include "spec/bsd_ext.td" -include "spec/llvm_libc_stdfix_ext.td" - - -def CTypeAPI : PublicAPI<"ctype.h"> { -} - -def FEnvAPI : PublicAPI<"fenv.h"> { - let Types = ["fenv_t", "fexcept_t"]; -} - -def IntTypesAPI : PublicAPI<"inttypes.h"> { - let Types = ["imaxdiv_t"]; -} - -def MathAPI : PublicAPI<"math.h"> { - let Types = ["double_t", "float_t"]; -} - -def StdIOAPI : PublicAPI<"stdio.h"> { - let Types = ["size_t"]; -} - -def StdlibAPI : PublicAPI<"stdlib.h"> { - let Types = [ - "div_t", - "ldiv_t", - "lldiv_t", - "size_t", - "__bsearchcompare_t", - "__qsortcompare_t", - ]; -} - -def StringAPI : PublicAPI<"string.h"> { - let Types = ["size_t"]; -} - -def TimeAPI : PublicAPI<"time.h"> { - let Types = [ - "clock_t", - "time_t", - "struct tm", - "struct timespec", - ]; -} - -def UCharAPI : PublicAPI<"uchar.h"> { - let Types = [ - "mbstate_t", - "char8_t", - "char16_t", - "char32_t", - ]; -} diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td deleted file mode 100644 index 995ff31c4ac9e..0000000000000 --- a/libc/config/gpu/api.td +++ /dev/null @@ -1,50 +0,0 @@ -include "config/public_api.td" - -include "spec/stdc.td" -include "spec/posix.td" -include "spec/gpu_ext.td" -include "spec/gnu_ext.td" -include "spec/stdc_ext.td" -include "spec/llvm_libc_ext.td" - - -def StringAPI : PublicAPI<"string.h"> { - let Types = ["size_t"]; -} - -def StdlibAPI : PublicAPI<"stdlib.h"> { - let Types = [ - "div_t", - "ldiv_t", - "lldiv_t", - "size_t", - "__bsearchcompare_t", - "__qsortcompare_t", - "__qsortrcompare_t", - "__atexithandler_t", - ]; -} - -def FenvAPI: PublicAPI<"fenv.h"> { - let Types = ["fenv_t"]; -} - -def StdIOAPI : PublicAPI<"stdio.h"> { - let Types = [ - "FILE", - "off_t", - "size_t", - ]; -} - -def IntTypesAPI : PublicAPI<"inttypes.h"> { - let Types = ["imaxdiv_t"]; -} - -def TimeAPI : PublicAPI<"time.h"> { - let Types = [ - "clock_t", - "time_t", - "struct timespec", - ]; -} diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td deleted file mode 100644 index a00e0f61b90df..0000000000000 --- a/libc/config/linux/api.td +++ /dev/null @@ -1,276 +0,0 @@ -include "config/public_api.td" - -include "spec/stdc.td" -include "spec/posix.td" -include "spec/linux.td" -include "spec/gnu_ext.td" -include "spec/bsd_ext.td" -include "spec/stdc_ext.td" -include "spec/llvm_libc_ext.td" -include "spec/llvm_libc_stdfix_ext.td" - -def CTypeAPI : PublicAPI<"ctype.h"> { -} - -def FCntlAPI : PublicAPI<"fcntl.h"> { - let Types = [ - "mode_t", - "off_t", - ]; -} - -def IntTypesAPI : PublicAPI<"inttypes.h"> { - let Types = ["imaxdiv_t"]; -} - -def MathAPI : PublicAPI<"math.h"> { - let Types = ["double_t", "float_t", "float128"]; -} - -def FenvAPI: PublicAPI<"fenv.h"> { - let Types = ["fenv_t", "fexcept_t"]; -} - -def StringAPI : PublicAPI<"string.h"> { - let Types = ["size_t"]; -} - -def StdIOAPI : PublicAPI<"stdio.h"> { - let Macros = [ - SimpleMacroDef<"stderr", "stderr">, - SimpleMacroDef<"stdin", "stdin">, - SimpleMacroDef<"stdout", "stdout">, - ]; - let Types = [ - "FILE", - "cookie_io_functions_t", - "off_t", - "size_t", - ]; -} - -def StdlibAPI : PublicAPI<"stdlib.h"> { - let Types = [ - "div_t", - "ldiv_t", - "lldiv_t", - "size_t", - "__bsearchcompare_t", - "__qsortcompare_t", - "__qsortrcompare_t", - "__atexithandler_t", - ]; -} - -def TimeAPI : PublicAPI<"time.h"> { - let Types = [ - "clock_t", - "time_t", - "struct tm", - "struct timespec", - "struct timeval", - "clockid_t", - ]; -} - -def SchedAPI : PublicAPI<"sched.h"> { - let Types = [ - "pid_t", - "size_t", - "cpu_set_t", - "struct sched_param", - // Needed according to posix standard - "time_t", - "struct timespec", - ]; -} - -def SysMManAPI : PublicAPI<"sys/mman.h"> { - let Types = ["off_t", "size_t", "mode_t"]; -} - -def SignalAPI : PublicAPI<"signal.h"> { - let Types = [ - "sig_atomic_t", - "sigset_t", - "struct sigaction", - "union sigval", - "siginfo_t", - "stack_t", - "pid_t", - ]; -} - -def ThreadsAPI : PublicAPI<"threads.h"> { - let Macros = [ - SimpleMacroDef<"ONCE_FLAG_INIT", "{0}">, - ]; - - let Types = [ - "__call_once_func_t", - "once_flag", - "cnd_t", - "mtx_t", - "thrd_t", - "thrd_start_t", - "tss_t", - "tss_dtor_t", - ]; - - let Enumerations = [ - "mtx_plain", - "mtx_recursive", - "mtx_timed", - "thrd_timedout", - "thrd_success", - "thrd_busy", - "thrd_error", - "thrd_nomem", - ]; -} - -def PThreadAPI : PublicAPI<"pthread.h"> { - let Types = [ - "__atfork_callback_t", - "__pthread_once_func_t", - "__pthread_start_t", - "__pthread_tss_dtor_t", - "pthread_attr_t", - "pthread_condattr_t", - "pthread_key_t", - "pthread_mutex_t", - "pthread_mutexattr_t", - "pthread_once_t", - "pthread_rwlockattr_t", - "pthread_rwlock_t", - "pthread_spinlock_t", - "pthread_t", - ]; -} - -def DirentAPI : PublicAPI<"dirent.h"> { - let Types = [ - "ino_t", - "DIR", - "struct dirent", - ]; -} - -def UniStdAPI : PublicAPI<"unistd.h"> { - let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", - "ssize_t", "uid_t", "__getoptargv_t"]; -} - -def WCharAPI : PublicAPI<"wchar.h"> { - let Types = [ - "wchar_t", - "wint_t", - "size_t", - ]; -} - -def UCharAPI : PublicAPI<"uchar.h"> { - let Types = [ - "mbstate_t", - "char8_t", - "char16_t", - "char32_t", - ]; -} - -def SysRandomAPI : PublicAPI<"sys/random.h"> { - let Types = ["size_t", "ssize_t"]; -} - -def SysSelectAPI : PublicAPI<"sys/select.h"> { - let Types = ["fd_set", "sigset_t", "suseconds_t", "time_t", "struct timespec", - "struct timeval"]; -} - -def SysSocketAPI : PublicAPI<"sys/socket.h"> { - let Types = [ - "sa_family_t", - "socklen_t", - "struct sockaddr", - "struct sockaddr_un", - "struct msghdr", - "struct iovec", - "size_t", - "ssize_t", - ]; -} - -def SysResourceAPI : PublicAPI<"sys/resource.h"> { - let Types = ["rlim_t", "struct rlimit"]; -} - -def SysStatAPI : PublicAPI<"sys/stat.h"> { - let Types = ["mode_t", "dev_t", "ino_t", "nlink_t", "uid_t", "gid_t", "off_t", - "struct timespec", "struct timeval", "blksize_t", "blkcnt_t", - "struct stat"]; -} - -def SysWaitAPI : PublicAPI<"sys/wait.h"> { - let Types = ["pid_t", "struct rusage", "siginfo_t"]; -} - -def SysSendfileAPI : PublicAPI<"sys/sendfile.h"> { - let Types = ["off_t", "size_t", "ssize_t"]; -} - -def SysTypesAPI : PublicAPI<"sys/types.h"> { - let Types = [ - "blkcnt_t", - "blksize_t", - "clockid_t", - "dev_t", - "gid_t", - "ino_t", - "mode_t", - "nlink_t", - "off_t", - "pid_t", - "pthread_attr_t", - "pthread_condattr_t", - "pthread_key_t", - "pthread_mutex_t", - "pthread_mutexattr_t", - "pthread_once_t", - "pthread_rwlockattr_t", - "pthread_rwlock_t", - "pthread_t", - "size_t", - "ssize_t", - "suseconds_t", - "time_t", - "uid_t" - ]; -} - -def SysUtsNameAPI : PublicAPI<"sys/utsname.h"> { - let Types = ["struct utsname"]; -} - -def SysEpollAPI : PublicAPI<"sys/epoll.h"> { - let Types = ["struct epoll_event", "struct epoll_data", "sigset_t", "struct timespec"]; -} - -def SpawnAPI : PublicAPI<"spawn.h"> { - let Types = ["mode_t", "pid_t", "posix_spawnattr_t", "posix_spawn_file_actions_t"]; -} - -def TermiosAPI : PublicAPI<"termios.h"> { - let Types = ["cc_t", "pid_t", "speed_t", "struct termios", "tcflag_t"]; -} - -def SetJmpAPI : PublicAPI<"setjmp.h"> { - let Types = ["jmp_buf"]; -} - -def SearchAPI : PublicAPI<"search.h"> { - let Types = ["ACTION", "ENTRY", "struct hsearch_data", "__lsearchcompare_t"]; -} - -def SysStatvfsAPI : PublicAPI<"sys/statvfs.h"> { - let Types = ["struct statvfs"]; -} diff --git a/libc/config/public_api.td b/libc/config/public_api.td deleted file mode 100644 index 1b34506c643c3..0000000000000 --- a/libc/config/public_api.td +++ /dev/null @@ -1,26 +0,0 @@ -include "spec/spec.td" - -class MacroDef { - string Name = name; - string Defn = ""; -} - -class SimpleMacroDef : MacroDef { - let Defn = !strconcat("#define ", name, " ", value); -} - -class MacroDefineIfNot : MacroDef { - let Defn = !strconcat("#ifndef ", name, "\n", - "#define " , name, " ", value, "\n", - "#endif // ", name); -} - -class PublicAPI { - string HeaderName = name; - list Macros = []; - list Types = []; - list Enumerations = []; - list Structs = []; - list Functions = []; - list Objects = []; -} diff --git a/libc/docs/dev/header_generation.rst b/libc/docs/dev/header_generation.rst index ec4206217ca77..0730b9a40c26a 100644 --- a/libc/docs/dev/header_generation.rst +++ b/libc/docs/dev/header_generation.rst @@ -38,9 +38,6 @@ Required Versions: ``build/projects/libc/include/sys``. -New Headergen is turned on by default, but if you want to use old Headergen, -you can include this statement when building: ``-DLIBC_USE_NEW_HEADER_GEN=OFF`` - To add a function to the YAML files, you can either manually enter it in the YAML file corresponding to the header it belongs to or add it through the command line. diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst index 37dccdab6dc34..88643575ae4d9 100644 --- a/libc/docs/gpu/building.rst +++ b/libc/docs/gpu/building.rst @@ -63,10 +63,6 @@ targeting the default host environment as well. Runtimes cross build -------------------- -.. note:: - These instructions need to be updated for new headergen. They may be - inaccurate. - For users wanting more direct control over the build process, the build steps can be done manually instead. This build closely follows the instructions in the :ref:`main documentation` but is specialized for the GPU @@ -82,20 +78,17 @@ compiler. These tools must all be up-to-date with the libc source. $> HOST_CXX_COMPILER= # For example "clang++" $> cmake ../llvm \ -G Ninja \ - -DLLVM_ENABLE_PROJECTS="clang;libc" \ + -DLLVM_ENABLE_PROJECTS="clang" \ -DCMAKE_C_COMPILER=$HOST_C_COMPILER \ -DCMAKE_CXX_COMPILER=$HOST_CXX_COMPILER \ -DLLVM_LIBC_FULL_BUILD=ON \ - -DLIBC_HDRGEN_ONLY=ON \ # Only build the 'libc-hdrgen' tool -DCMAKE_BUILD_TYPE=Release # Release suggested to make "clang" fast $> ninja # Build the 'clang' compiler - $> ninja libc-hdrgen # Build the 'libc-hdrgen' tool -Once this has finished the build directory should contain the ``clang`` compiler -and the ``libc-hdrgen`` executable. We will use the ``clang`` compiler to build -the GPU code and the ``libc-hdrgen`` tool to create the necessary headers. We -use these tools to bootstrap the build out of the runtimes directory targeting a -GPU architecture. +Once this has finished the build directory should contain the ``clang`` +compiler executable. We will use the ``clang`` compiler to build the GPU code. +We use these tools to bootstrap the build out of the runtimes directory +targeting a GPU architecture. .. code-block:: sh @@ -105,7 +98,6 @@ GPU architecture. $> TARGET_TRIPLE= $> TARGET_C_COMPILER= $> TARGET_CXX_COMPILER= - $> HDRGEN= $> cmake ../runtimes \ # Point to the runtimes build -G Ninja \ -DLLVM_ENABLE_RUNTIMES=libc \ @@ -113,7 +105,6 @@ GPU architecture. -DCMAKE_CXX_COMPILER=$TARGET_CXX_COMPILER \ -DLLVM_LIBC_FULL_BUILD=ON \ -DLLVM_RUNTIMES_TARGET=$TARGET_TRIPLE \ - -DLIBC_HDRGEN_EXE=$HDRGEN \ -DCMAKE_BUILD_TYPE=Release $> ninja install diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 899a93ad72d4c..7fc67141996ec 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -17,25 +17,17 @@ add_header( __llvm-libc-common.h ) +# TODO: Can we simplify this macro expansion? +# https://github.com/llvm/llvm-project/issues/117254 macro(add_header_macro TARGET_NAME YAML_FILE DEF_FILE GEN_HDR DEPENDS) - if (LIBC_USE_NEW_HEADER_GEN) - add_gen_header2( - ${TARGET_NAME} - YAML_FILE ${YAML_FILE} - DEF_FILE ${DEF_FILE} - GEN_HDR ${GEN_HDR} - ${DEPENDS} - ${ARGN} - ) - else() - add_gen_header( - ${TARGET_NAME} - DEF_FILE ${DEF_FILE} - GEN_HDR ${GEN_HDR} - ${DEPENDS} - ${ARGN} - ) - endif() + add_gen_header( + ${TARGET_NAME} + YAML_FILE ${YAML_FILE} + DEF_FILE ${DEF_FILE} + GEN_HDR ${GEN_HDR} + ${DEPENDS} + ${ARGN} + ) endmacro() add_header_macro( diff --git a/libc/spec/bsd_ext.td b/libc/spec/bsd_ext.td deleted file mode 100644 index 2b91324e36db9..0000000000000 --- a/libc/spec/bsd_ext.td +++ /dev/null @@ -1,87 +0,0 @@ -def BsdExtensions : StandardSpec<"BSDExtensions"> { - HeaderSpec Math = HeaderSpec< - "math.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec<"isnan", RetValSpec, [ArgSpec]>, - FunctionSpec<"isnanf", RetValSpec, [ArgSpec]>, - FunctionSpec<"isnanl", RetValSpec, [ArgSpec]>, - ] - >; - - HeaderSpec String = HeaderSpec< - "string.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "strlcat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strlcpy", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strsep", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Strings = HeaderSpec< - "strings.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "strcasecmp", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strncasecmp", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "index", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "rindex", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec SysWait = HeaderSpec< - "sys/wait.h", - [], // Macros - [StructRUsage], // Types - [], // Enumerations - [ - FunctionSpec< - "wait4", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - > - ] - >; - - let Headers = [ - Math, - String, - Strings, - SysWait, - ]; -} diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td deleted file mode 100644 index 64121aed9574f..0000000000000 --- a/libc/spec/gnu_ext.td +++ /dev/null @@ -1,316 +0,0 @@ -def CpuSetT : NamedType<"cpu_set_t">; -def CpuSetPtr : PtrType; -def ConstCpuSetPtr : ConstType; - -def QSortRCompareT : NamedType<"__qsortrcompare_t">; -def StructHsearchData : NamedType<"struct hsearch_data">; -def StructHsearchDataPtr : PtrType; - -def GnuExtensions : StandardSpec<"GNUExtensions"> { - NamedType CookieIOFunctionsT = NamedType<"cookie_io_functions_t">; - HeaderSpec CType = HeaderSpec< - "ctype.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "toascii", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec Malloc = HeaderSpec< - "malloc.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec<"mallopt", RetValSpec, [ArgSpec, ArgSpec]>, - ] - >; - - HeaderSpec Math = HeaderSpec< - "math.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "sincosf", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "lgamma_r", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "lgammaf_r", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "lgammal_r", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Sched = HeaderSpec< - "sched.h", - [], // Macros - [PidT, SizeTType, CpuSetT], // Types - [], // Enumerations - [ - FunctionSpec< - "sched_getaffinity", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "sched_setaffinity", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - HeaderSpec String = HeaderSpec< - "string.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "memmem", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "memrchr", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strerror_r", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strcasestr", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strchrnul", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Search = HeaderSpec< - "search.h", - [], // Macros - [ - StructHsearchData - ], - [], // Enumerations - [ - FunctionSpec< - "hcreate_r", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "hdestroy_r", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "hsearch_r", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - ] - >; - - HeaderSpec FEnv = HeaderSpec< - "fenv.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "fedisableexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "feenableexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fegetexcept", - RetValSpec, - [] - > - ] - >; - - HeaderSpec StdIO = HeaderSpec< - "stdio.h", - [], // Macros - [CookieIOFunctionsT], // Types - [], // Enumerations - [ - FunctionSpec< - "clearerr_unlocked", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "feof_unlocked", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ferror_unlocked", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fopencookie", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "fread_unlocked", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fwrite_unlocked", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fgetc_unlocked", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec StdLib = HeaderSpec< - "stdlib.h", - [], // Macros - [QSortRCompareT], // Types - [], // Enumerations - [ - FunctionSpec< - "qsort_r", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec PThread = HeaderSpec< - "pthread.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "pthread_setname_np", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_getname_np", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec SysAuxv = HeaderSpec< - "sys/auxv.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "getauxval", - RetValSpec, - [ArgSpec] - >, - ] // Functions - >; - - HeaderSpec SendFile = HeaderSpec< - "sys/sendfile.h", - [], // Macros - [OffTType, SizeTType, SSizeTType,], // Types - [], // Enumerations - [ - FunctionSpec< - "sendfile", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec UniStd = HeaderSpec< - "unistd.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "dup2", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - let Headers = [ - CType, - FEnv, - Malloc, - Math, - PThread, - Sched, - SendFile, - SysAuxv, - StdIO, - StdLib, - String, - Search, - UniStd, - ]; -} diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td deleted file mode 100644 index d99531dc06bcd..0000000000000 --- a/libc/spec/gpu_ext.td +++ /dev/null @@ -1,18 +0,0 @@ -def GPUExtensions : StandardSpec<"GPUExtensions"> { - HeaderSpec RPC = HeaderSpec< - "gpu/rpc.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "rpc_host_call", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - let Headers = [ - RPC, - ]; -} diff --git a/libc/spec/linux.td b/libc/spec/linux.td deleted file mode 100644 index 99e0949a592df..0000000000000 --- a/libc/spec/linux.td +++ /dev/null @@ -1,334 +0,0 @@ -def StructEpollEvent : NamedType<"struct epoll_event">; -def StructEpollEventPtr : PtrType; - -def StructEpollData : NamedType<"struct epoll_data">; - -def Linux : StandardSpec<"Linux"> { - HeaderSpec Errno = HeaderSpec< - "errno.h", - [ - Macro<"ENOMEDIUM">, - Macro<"ENOTBLK">, - Macro<"EMEDIUMTYPE">, - Macro<"EBADSLT">, - Macro<"ECHRNG">, - Macro<"ERFKILL">, - Macro<"EUSERS">, - Macro<"EBADR">, - Macro<"EL3HLT">, - Macro<"ENOTUNIQ">, - Macro<"EXFULL">, - Macro<"EHOSTDOWN">, - Macro<"EL3RST">, - Macro<"ENOPKG">, - Macro<"ENOCSI">, - Macro<"EUNATCH">, - Macro<"EREMCHG">, - Macro<"ETOOMANYREFS">, - Macro<"EL2HLT">, - Macro<"EBADFD">, - Macro<"EREMOTEIO">, - Macro<"ENAVAIL">, - Macro<"ELIBEXEC">, - Macro<"ESHUTDOWN">, - Macro<"ENOKEY">, - Macro<"ESTRPIPE">, - Macro<"EKEYREJECTED">, - Macro<"ESRMNT">, - Macro<"EKEYREVOKED">, - Macro<"EBADE">, - Macro<"ELIBBAD">, - Macro<"EISNAM">, - Macro<"EBFONT">, - Macro<"EPFNOSUPPORT">, - Macro<"EREMOTE">, - Macro<"EDEADLOCK">, - Macro<"ENONET">, - Macro<"EDOTDOT">, - Macro<"EKEYEXPIRED">, - Macro<"ELIBSCN">, - Macro<"ERESTART">, - Macro<"EBADRQC">, - Macro<"EUCLEAN">, - Macro<"ENOANO">, - Macro<"ELIBACC">, - Macro<"EHWPOISON">, - Macro<"ELIBMAX">, - Macro<"ESOCKTNOSUPPORT">, - Macro<"ENOTNAM">, - Macro<"ELNRNG">, - Macro<"EL2NSYNC">, - Macro<"EADV">, - Macro<"ECOMM">, - ] - >; - - HeaderSpec Sched = HeaderSpec< - "sched.h", - [ - Macro<"SCHED_OTHER">, - Macro<"SCHED_FIFO">, - Macro<"SCHED_RR">, - Macro<"SCHED_BATCH">, - Macro<"SCHED_ISO">, - Macro<"SCHED_IDLE">, - Macro<"SCHED_DEADLINE">, - ], - [], // Types - [], // Enumerations - [] // Functions - >; - - HeaderSpec SysMMan = HeaderSpec< - "sys/mman.h", - [Macro<"MAP_ANONYMOUS">], - [], // Types - [], // Enumerations - [ - FunctionSpec< - "mincore", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "mlock2", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "remap_file_pages", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - FunctionSpec< - "process_mrelease", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "mremap", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ] - >, - ] // Functions - >; - - - HeaderSpec SysPrctl = HeaderSpec< - "sys/prctl.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "prctl", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ] - >, - ] // Functions - >; - - HeaderSpec SysRandom = HeaderSpec< - "sys/random.h", - [ - Macro<"GRND_RANDOM">, - Macro<"GRND_NONBLOCK">, - Macro<"GRND_INSECURE">, - ], - [SizeTType, SSizeTType], // Types - [], // Enumerations - [ - FunctionSpec< - "getrandom", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - ] - >; - - HeaderSpec SysTime = HeaderSpec< - "sys/time.h", - [ - Macro<"timeradd">, - Macro<"timersub">, - Macro<"timerclear">, - Macro<"timerisset">, - Macro<"timercmp">, - ], - [StructTimevalType], // Types - [], // Enumerations - [] // Functions - >; - - - HeaderSpec SysEpoll = HeaderSpec< - "sys/epoll.h", - [], // Macros - [ - StructEpollEvent, - StructEpollData, - SigSetType, - StructTimeSpec, - ], // Types - [], // Enumerations - [ - FunctionSpec< - "epoll_create", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "epoll_create1", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "epoll_ctl", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "epoll_wait", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "epoll_pwait", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "epoll_pwait2", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - ] // Functions - >; - - HeaderSpec Signal = HeaderSpec< - "signal.h", - [ - Macro<"NSIG">, - - Macro<"SIGHUP">, - Macro<"SIGINT">, - Macro<"SIGQUIT">, - Macro<"SIGILL">, - Macro<"SIGTRAP">, - Macro<"SIGABRT">, - Macro<"SIGIOT">, - Macro<"SIGBUS">, - Macro<"SIGFPE">, - Macro<"SIGKILL">, - Macro<"SIGUSR1">, - Macro<"SIGSEGV">, - Macro<"SIGUSR2">, - Macro<"SIGPIPE">, - Macro<"SIGALRM">, - Macro<"SIGTERM">, - Macro<"SIGSTKFLT">, - Macro<"SIGCHLD">, - Macro<"SIGCONT">, - Macro<"SIGSTOP">, - Macro<"SIGTSTP">, - Macro<"SIGTTIN">, - Macro<"SIGTTOU">, - Macro<"SIGURG">, - Macro<"SIGXCPU">, - Macro<"SIGXFSZ">, - Macro<"SIGVTALRM">, - Macro<"SIGPROF">, - Macro<"SIGWINCH">, - Macro<"SIGIO">, - Macro<"SIGPOLL">, - Macro<"SIGPWR">, - Macro<"SIGSYS">, - Macro<"SIGUNUSED">, - ] - >; - - - HeaderSpec UniStd = HeaderSpec< - "unistd.h", - [], // Macros - [], - [], // Enumerations - [ - FunctionSpec< - "pipe2", - RetValSpec, - [ArgSpec, ArgSpec] //TODO: make this int[2] - >, - ], - [] - >; - - - let Headers = [ - Errno, - SysEpoll, - SysMMan, - SysPrctl, - SysRandom, - SysTime, - Signal, - UniStd, - ]; -} diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td deleted file mode 100644 index cd63e34a44ef0..0000000000000 --- a/libc/spec/llvm_libc_ext.td +++ /dev/null @@ -1,116 +0,0 @@ -def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> { - HeaderSpec Strings = HeaderSpec< - "strings.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "bcopy", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "bzero", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "bcmp", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Assert = HeaderSpec< - "assert.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "__assert_fail", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Sched = HeaderSpec< - "sched.h", - [], // Macros - [PidT, SizeTType, CpuSetT], // Types - [], // Enumerations - [ - FunctionSpec< - "__sched_getcpucount", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Math = HeaderSpec< - "math.h", - [], // Macros - [], // Types - [], // Enumerations - [ - GuardedFunctionSpec<"daddf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - GuardedFunctionSpec<"ddivf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - GuardedFunctionSpec<"dfmaf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - GuardedFunctionSpec<"dsqrtf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - GuardedFunctionSpec<"dsubf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - - GuardedFunctionSpec<"f16add", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16addf", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16addl", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - GuardedFunctionSpec<"f16sub", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16subf", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16subl", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - GuardedFunctionSpec<"faddf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"fdivf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"ffmaf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"fmulf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"dmulf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"f16mul", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16mulf", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16mull", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - GuardedFunctionSpec<"f16div", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16divf", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16divl", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - GuardedFunctionSpec<"f16fma", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16fmal", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - GuardedFunctionSpec<"f16sqrt", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16sqrtf", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"f16sqrtl", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - GuardedFunctionSpec<"fsqrtf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"fsubf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"powi", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"powif", RetValSpec, [ArgSpec, ArgSpec]>, - ] - >; - - let Headers = [ - Assert, - Math, - Sched, - Strings, - ]; -} diff --git a/libc/spec/llvm_libc_stdfix_ext.td b/libc/spec/llvm_libc_stdfix_ext.td deleted file mode 100644 index 7bc7ec5464081..0000000000000 --- a/libc/spec/llvm_libc_stdfix_ext.td +++ /dev/null @@ -1,27 +0,0 @@ -def LLVMLibcStdfixExt : StandardSpec<"llvm_libc_stdfix_ext"> { - HeaderSpec StdFix = HeaderSpec< - "stdfix.h", - [], // macros - [], // types - [], // enums - [ // functions - GuardedFunctionSpec<"exphk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"expk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"uhksqrtus", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"uksqrtui", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - ] - >; - - let Headers = [ - StdFix, - ]; -} diff --git a/libc/spec/posix.td b/libc/spec/posix.td deleted file mode 100644 index e354deef340f1..0000000000000 --- a/libc/spec/posix.td +++ /dev/null @@ -1,1867 +0,0 @@ -def SigSetType : NamedType<"sigset_t">; -def SigSetPtrType : PtrType; -def ConstSigSetPtrType : ConstType; -def RestrictedSigSetType : RestrictedPtrType; -def ConstRestrictedSigSetType : ConstType; - -def SigInfoType : NamedType<"siginfo_t">; -def UnionSigVal : NamedType<"union sigval">; - -def StructSigaction : NamedType<"struct sigaction">; -def StructSigactionPtr : PtrType; -def ConstStructSigactionPtr : ConstType; -def RestrictedStructSigactionPtr : RestrictedPtrType; -def ConstRestrictedStructSigactionPtr : ConstType; - -def PThreadStartT : NamedType<"__pthread_start_t">; -def PThreadTSSDtorT : NamedType<"__pthread_tss_dtor_t">; -def PThreadKeyT : NamedType<"pthread_key_t">; -def PThreadKeyTPtr : PtrType; -def PThreadOnceT : NamedType<"pthread_once_t">; -def PThreadOnceTPtr : PtrType; -def PThreadOnceCallback : NamedType<"__pthread_once_func_t">; - -def InoT : NamedType<"ino_t">; -def UidT : NamedType<"uid_t">; -def GidT : NamedType<"gid_t">; -def DevT : NamedType<"dev_t">; -def ClockIdT : NamedType<"clockid_t">; -def RestrictedClockIdTPtr : RestrictedPtrType; -def BlkSizeT : NamedType<"blksize_t">; -def BlkCntT : NamedType<"blkcnt_t">; -def NLinkT : NamedType<"nlink_t">; - -def StatType : NamedType<"struct stat">; -def StatTypePtr : PtrType; -def RestrictedStatTypePtr : RestrictedPtrType; - -def DIR : NamedType<"DIR">; -def DIRPtr : PtrType

; -def DIRRestrictedPtr : RestrictedPtrType; -def StructDirent : NamedType<"struct dirent">; -def StructDirentPtr : PtrType; -def StructDirentPtrPtr : PtrType; -def ConstStructDirentPtrPtr : ConstType; - -def StructSchedParam : NamedType<"struct sched_param">; -def StructSchedParamPtr : PtrType; -def ConstStructSchedParamPtr : ConstType; - -def ExecArgvT : NamedType<"__exec_argv_t">; -def ExecEnvpT : NamedType<"__exec_envp_t">; - -def AtForkCallbackT : NamedType<"__atfork_callback_t">; - -def PosixSpawnFileActionsT : NamedType<"posix_spawn_file_actions_t">; -def PosixSpawnFileActionsTPtr : PtrType; -def ConstPosixSpawnFileActionsTPtr : ConstType; -def PosixSpawnFileActionsTRestrictedPtr : RestrictedPtrType; - -def PosixSpawnAttrT : NamedType<"posix_spawnattr_t">; -def RestrictedPosixSpawnAttrTPtrType : RestrictedPtrType; - -def CcT : NamedType<"cc_t">; -def SpeedT : NamedType<"speed_t">; -def StructTermios : NamedType<"struct termios">; -def StructTermiosPtr : PtrType; -def ConstStructTermiosPtr : ConstType; -def TcFlagT : NamedType<"tcflag_t">; - -def StackT : NamedType<"stack_t">; -def StackTPtr : PtrType; -def RestrictedStackTPtr : RestrictedPtrType; -def ConstRestrictedStackTPtr : ConstType; - -def FdSet : NamedType<"fd_set">; -def FdSetPtr : PtrType; -def RestrictedFdSetPtr : RestrictedPtrType; - -def GetoptArgvT : NamedType<"__getoptargv_t">; - -def SAFamilyType : NamedType<"sa_family_t">; -def SocklenType : NamedType<"socklen_t">; -def SocklenPtr : PtrType; - -def StructSockAddr : NamedType<"struct sockaddr">; -def StructSockAddrPtr : PtrType; -def ConstStructSockAddrPtr : ConstType; - -def StructMsghdr : NamedType<"struct msghdr">; -def StructMsghdrPtr : PtrType; -def ConstStructMsghdrPtr : ConstType; - -def StructIovec : NamedType<"struct iovec">; -def StructIovecPtr : PtrType; -def ConstStructIovecPtr : ConstType; - -def StructSockAddrUn : NamedType<"struct sockaddr_un">; - -def StructStatvfs : NamedType<"struct statvfs">; -def StructStatvfsPtr : PtrType; -def RestrictedStructStatvfsPtr : RestrictedPtrType; - -// The function pointer type for the predicate for lsearch, lfind -def LSearchCompareT : NamedType<"__lsearchcompare_t">; - -def POSIX : StandardSpec<"POSIX"> { - PtrType CharPtr = PtrType; - RestrictedPtrType RestrictedCharPtr = RestrictedPtrType; - RestrictedPtrType CharRestrictedDoublePtr = RestrictedPtrType; - ConstType ConstCharPtr = ConstType; - ConstType ConstRestrictedCharPtr = ConstType; - - NamedType ModeTType = NamedType<"mode_t">; - - NamedType PThreadAttrTType = NamedType<"pthread_attr_t">; - PtrType PThreadAttrTPtr = PtrType; - RestrictedPtrType RestrictedPThreadAttrTPtr = RestrictedPtrType; - ConstType ConstPThreadAttrTPtr = ConstType; - ConstType ConstRestrictedPThreadAttrTPtr = ConstType; - - NamedType PThreadCondAttrTType = NamedType<"pthread_condattr_t">; - PtrType PThreadCondAttrTPtr = PtrType; - ConstType ConstRestrictedPThreadCondAttrTPtr = ConstType>; - - NamedType PThreadRWLockAttrTType = NamedType<"pthread_rwlockattr_t">; - PtrType PThreadRWLockAttrTPtr = PtrType; - ConstType ConstPThreadRWLockAttrTPtr = ConstType; - RestrictedPtrType RestrictedPThreadRWLockAttrTPtr = RestrictedPtrType; - ConstType ConstRestrictedPThreadRWLockAttrTPtr = ConstType; - - NamedType PThreadMutexAttrTType = NamedType<"pthread_mutexattr_t">; - PtrType PThreadMutexAttrTPtr = PtrType; - RestrictedPtrType RestrictedPThreadMutexAttrTPtr = RestrictedPtrType; - ConstType ConstPThreadMutexAttrTPtr = ConstType; - ConstType ConstRestrictedPThreadMutexAttrTPtr = ConstType; - - NamedType PThreadMutexTType = NamedType<"pthread_mutex_t">; - PtrType PThreadMutexTPtr = PtrType; - RestrictedPtrType RestrictedPThreadMutexTPtr = RestrictedPtrType; - ConstType ConstPThreadMutexTPtr = ConstType; - ConstType ConstRestrictedPThreadMutexTPtr = ConstType; - - NamedType PThreadRWLockTType = NamedType<"pthread_rwlock_t">; - PtrType PThreadRWLockTPtr = PtrType; - RestrictedPtrType RestrictedPThreadRWLockTPtr = RestrictedPtrType; - - NamedType PThreadSpinLockTType = NamedType<"pthread_spinlock_t">; - PtrType PThreadSpinLockTPtr = PtrType; - - PtrType PThreadTPtr = PtrType; - RestrictedPtrType RestrictedPThreadTPtr = RestrictedPtrType; - - HeaderSpec Errno = HeaderSpec< - "errno.h", - [ - Macro<"E2BIG">, - Macro<"EACCES">, - Macro<"EADDRINUSE">, - Macro<"EADDRNOTAVAIL">, - Macro<"EAFNOSUPPORT">, - Macro<"EAGAIN">, - Macro<"EALREADY">, - Macro<"EBADF">, - Macro<"EBADMSG">, - Macro<"EBUSY">, - Macro<"ECANCELED">, - Macro<"ECHILD">, - Macro<"ECONNABORTED">, - Macro<"ECONNREFUSED">, - Macro<"ECONNRESET">, - Macro<"EDEADLK">, - Macro<"EDESTADDRREQ">, - Macro<"EDQUOT">, - Macro<"EEXIST">, - Macro<"EFAULT">, - Macro<"EFBIG">, - Macro<"EHOSTUNREACH">, - Macro<"EIDRM">, - Macro<"EINPROGRESS">, - Macro<"EINTR">, - Macro<"EINVAL">, - Macro<"EIO">, - Macro<"EISCONN">, - Macro<"EISDIR">, - Macro<"ELOOP">, - Macro<"EMFILE">, - Macro<"EMLINK">, - Macro<"EMSGSIZE">, - Macro<"EMULTIHOP">, - Macro<"ENAMETOOLONG">, - Macro<"ENETDOWN">, - Macro<"ENETRESET">, - Macro<"ENETUNREACH">, - Macro<"ENFILE">, - Macro<"ENOBUFS">, - Macro<"ENODATA">, - Macro<"ENODEV">, - Macro<"ENOENT">, - Macro<"ENOEXEC">, - Macro<"ENOLCK">, - Macro<"ENOLINK">, - Macro<"ENOMEM">, - Macro<"ENOMSG">, - Macro<"ENOPROTOOPT">, - Macro<"ENOSPC">, - Macro<"ENOSR">, - Macro<"ENOSTR">, - Macro<"ENOSYS">, - Macro<"ENOTCONN">, - Macro<"ENOTDIR">, - Macro<"ENOTEMPTY">, - Macro<"ENOTRECOVERABLE">, - Macro<"ENOTSOCK">, - Macro<"ENOTSUP">, - Macro<"ENOTTY">, - Macro<"ENXIO">, - Macro<"EOPNOTSUPP">, - Macro<"EOVERFLOW">, - Macro<"EOWNERDEAD">, - Macro<"EPERM">, - Macro<"EPIPE">, - Macro<"EPROTO">, - Macro<"EPROTONOSUPPORT">, - Macro<"EPROTOTYPE">, - Macro<"EROFS">, - Macro<"ESPIPE">, - Macro<"ESRCH">, - Macro<"ESTALE">, - Macro<"ETIME">, - Macro<"ETIMEDOUT">, - Macro<"ETXTBSY">, - Macro<"EWOULDBLOCK">, - Macro<"EXDEV">, - ], - [], // Types - [], // Enumerations - [] // Functions - >; - - HeaderSpec DlFcn = HeaderSpec< - "dlfcn.h", - [ - Macro<"RTLD_LAZY">, - Macro<"RTLD_NOW">, - Macro<"RTLD_GLOBAL">, - Macro<"RTLD_LOCAL">, - ], - [], // Types - [], // Enumerations - [ - FunctionSpec< - "dlclose", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "dlerror", - RetValSpec, - [] - >, - FunctionSpec< - "dlopen", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "dlsym", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec FCntl = HeaderSpec< - "fcntl.h", - [], // Macros - [ - ModeTType, - OffTType, - ], - [], // Enumerations - [ - FunctionSpec< - "creat", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "fcntl", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "open", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "openat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec SysMMan = HeaderSpec< - "sys/mman.h", - [ - // TODO: Add a facility to bunch macros into bitwise-or-able groups. - // POSIX requires it, so such thing should be captured in this spec. - Macro<"PROT_EXEC">, - Macro<"PROT_NONE">, - Macro<"PROT_READ">, - Macro<"PROT_WRITE">, - - Macro<"MAP_FIXED">, - Macro<"MAP_PRIVATE">, - Macro<"MAP_SHARED">, - - Macro<"MAP_FAILED">, - ], - [ - SizeTType, - OffTType, - ModeTType, - ], - [], // Enumerations - [ - FunctionSpec< - "madvise", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "mmap", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "mprotect", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "munmap", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "posix_madvise", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "mlock", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "munlock", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "mlockall", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "munlockall", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "msync", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "shm_open", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "shm_unlink", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec Signal = HeaderSpec< - "signal.h", - [], // Macros - [ - SigInfoType, - SigSetType, - StackT, - StructSigaction, - UnionSigVal, - PidT, - ], - [], // Enumerations - [ - FunctionSpec< - "kill", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "sigaction", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "sigaltstack", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "sigdelset", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "sigprocmask", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "sigemptyset", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "sigaddset", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "sigfillset", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec UniStd = HeaderSpec< - "unistd.h", - [], // Macros - [ - ExecArgvT, - ExecEnvpT, - OffTType, - SSizeTType, - SizeTType, - PidT, - UidT, - GetoptArgvT, - ], - [], // Enumerations - [ - FunctionSpec< - "_exit", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "access", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "chdir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "dup", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "dup2", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "dup3", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "fchdir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getcwd", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "close", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "execv", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "execve", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "fork", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fsync", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ftruncate", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "geteuid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getpid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getppid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "gettid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getuid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isatty", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "link", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "linkat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "lseek", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pread", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pwrite", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "read", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "readlink", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "readlinkat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "rmdir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getpid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getppid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "link", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "linkat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "lseek", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pipe", - RetValSpec, - [ArgSpec] //TODO: make this int[2] - >, - FunctionSpec< - "pread", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pwrite", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "read", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "readlink", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "readlinkat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "rmdir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "swab", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "symlink", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "symlinkat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "sysconf", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "__llvm_libc_syscall", - RetValSpec, - [ArgSpec,ArgSpec,ArgSpec,ArgSpec,ArgSpec,ArgSpec,ArgSpec] - >, - FunctionSpec< - "truncate", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "unlink", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "unlinkat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "write", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "getopt", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ], - [ - ObjectSpec<"environ", "char **">, - ObjectSpec< - "optarg", - "char *" - >, - ObjectSpec< - "optind", - "int" - >, - ObjectSpec< - "opterr", - "int" - >, - ObjectSpec< - "optopt", - "int" - >, - ] - >; - - HeaderSpec StdLib = HeaderSpec< - "stdlib.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "getenv", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec Sched = HeaderSpec< - "sched.h", - [], // Macros - [PidT, TimeTType, StructTimeSpec, StructSchedParam], // Types - [], // Enumerations - [ - FunctionSpec< - "sched_yield", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "sched_setparam", - RetValSpec, - [ArgSpec, ArgSpec] - >, - - FunctionSpec< - "sched_getparam", - RetValSpec, - [ArgSpec, ArgSpec] - >, - - FunctionSpec< - "sched_setscheduler", - RetValSpec, - [ArgSpec] - >, - - FunctionSpec< - "sched_getscheduler", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - - FunctionSpec< - "sched_get_priority_min", - RetValSpec, - [ArgSpec] - >, - - FunctionSpec< - "sched_get_priority_max", - RetValSpec, - [ArgSpec] - >, - - FunctionSpec< - "sched_rr_get_interval", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec String = HeaderSpec< - "string.h", - [ - Macro<"NULL">, - ], - [ - SizeTType, - ], - [], // Enumerations - [ - FunctionSpec< - "memccpy", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "mempcpy", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "stpcpy", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "stpncpy", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "strnlen", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strtok_r", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "strsignal", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec CType = HeaderSpec< - "ctype.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "isascii", - RetValSpec, - [ArgSpec] - >, - ] - >; - - NamedType RLimTType = NamedType<"rlim_t">; - NamedType StructRLimitType = NamedType<"struct rlimit">; - PtrType StructRLimitPtr = PtrType; - ConstType ConstStructRLimitPtr = ConstType; - HeaderSpec SysResource = HeaderSpec< - "sys/resource.h", - [], // Macros - [RLimTType, StructRLimitType], // Types - [], // Enumerations - [ - FunctionSpec< - "getrlimit", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "setrlimit", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec SysStat = HeaderSpec< - "sys/stat.h", - [], // Macros - [ - ModeTType, - DevT, - InoT, - UidT, - GidT, - StructTimeSpec, - StructTimevalType, - BlkSizeT, - BlkCntT, - OffTType, - NLinkT, - StatType, - ], // Types - [], // Enumerations - [ - FunctionSpec< - "chmod", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "fchmod", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "fchmodat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "fstat", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "lstat", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "mkdir", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "mkdirat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "stat", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec SysStatvfs = HeaderSpec< - "sys/statvfs.h", - [], // Macros - [StructStatvfs], // Types - [], // Enumerations - [ - FunctionSpec< - "statvfs", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "fstatvfs", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - ] // Functions - >; - - NamedType StructUtsName = NamedType<"struct utsname">; - PtrType StructUtsNamePtr = PtrType; - HeaderSpec SysUtsName = HeaderSpec< - "sys/utsname.h", - [], // Macros - [StructUtsName], // Types - [], // Enumerations - [ - FunctionSpec< - "uname", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec ArpaInet = HeaderSpec< - "arpa/inet.h", - [], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec< - "htonl", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "htons", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ntohl", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ntohs", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec PThread = HeaderSpec< - "pthread.h", - [], // Macros - [ - AtForkCallbackT, - ClockIdT, - PThreadAttrTType, - PThreadCondAttrTType, - PThreadKeyT, - PThreadMutexAttrTType, - PThreadMutexTType, - PThreadOnceCallback, - PThreadOnceT, - PThreadRWLockAttrTType, - PThreadRWLockTType, - PThreadSpinLockTType, - PThreadStartT, - PThreadTSSDtorT, - PThreadTType, - ], // Types - [], // Enumerations - [ - FunctionSpec< - "pthread_atfork", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_init", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_attr_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_attr_getdetachstate", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_setdetachstate", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_getguardsize", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_setguardsize", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_getstacksize", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_setstacksize", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_getstack", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_attr_setstack", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_condattr_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_condattr_getclock", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_condattr_getpshared", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_condattr_init", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_condattr_setclock", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_condattr_setpshared", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_create", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_join", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_detach", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_exit", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_self", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_equal", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_init", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_gettype", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_settype", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_getrobust", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_setrobust", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_getpshared", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_setpshared", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_getprotocol", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_setprotocol", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_getprioceiling", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutexattr_setprioceiling", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutex_init", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_mutex_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_mutex_lock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_mutex_unlock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_key_create", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_key_delete", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_getspecific", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_setspecific", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_once", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlockattr_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlockattr_getkind_np", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlockattr_getpshared", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlockattr_init", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlockattr_setkind_np", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlockattr_setpshared", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_init", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_tryrdlock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_trywrlock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_timedrdlock", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_timedwrlock", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_clockrdlock", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_clockwrlock", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_rdlock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_wrlock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_unlock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_rwlock_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_spin_init", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "pthread_spin_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_spin_lock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_spin_trylock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "pthread_spin_unlock", - RetValSpec, - [ArgSpec] - > - ] - >; - - HeaderSpec StdIO = HeaderSpec< - "stdio.h", - [], // Macros - [OffTType], // Types - [], // Enumerations - [ - FunctionSpec< - "flockfile", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "funlockfile", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getc_unlocked", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getchar_unlocked", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fileno", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fdopen", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Dirent = HeaderSpec< - "dirent.h", - [], // Macros - [InoT, StructDirent, DIR], // Types - [], // Enumerations - [ - FunctionSpec< - "alphasort", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "closedir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "dirfd", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fdopendir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "opendir", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "readdir", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec Time = HeaderSpec< - "time.h", - [], // Macros - [ClockIdT, StructTimeSpec, StructTimevalType], // Types - [], // Enumerations - [ - FunctionSpec< - "clock_gettime", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "gettimeofday", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "nanosleep", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec SysWait = HeaderSpec< - "sys/wait.h", - [], // Macros - [PidT, StructRUsage, SigInfoType], - [], // Enumerations - [ - FunctionSpec< - "wait", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "waitpid", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - > - ] - >; - - HeaderSpec SysIOctl = HeaderSpec< - "sys/ioctl.h", - [ - Macro<"TIOCGETD">, - ], // Macros - [], // Types - [], // Enumerations - [] // Functions - >; - - HeaderSpec Spawn = HeaderSpec< - "spawn.h", - [], // Macros - [ModeTType, PosixSpawnAttrT, PidT, PosixSpawnFileActionsT], - [], // Enumerations - [ - FunctionSpec< - "posix_spawn_file_actions_addclose", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "posix_spawn_file_actions_adddup2", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "posix_spawn_file_actions_addopen", - RetValSpec, - [ArgSpec, ArgSpec, - ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "posix_spawn_file_actions_destroy", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "posix_spawn_file_actions_init", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "posix_spawn", - RetValSpec, - [ArgSpec, ArgSpec, - ArgSpec, ArgSpec, - ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Search = HeaderSpec< - "search.h", - [], // Macros - [ - ActionType, - EntryType, - LSearchCompareT, - ], // Types - [], // Enumerations - [ - FunctionSpec< - "hcreate", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "hdestroy", - RetValSpec, - [] // Args - >, - FunctionSpec< - "hsearch", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "insque", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "remque", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "lfind", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec - ] - > - ] - >; - - HeaderSpec Termios = HeaderSpec< - "termios.h", - [ - Macro<"NCCS">, - ], - [CcT, PidT, SpeedT, StructTermios, TcFlagT], // Types - [], // Enumerations - [ - FunctionSpec< - "cfgetispeed", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "cfgetospeed", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "cfsetispeed", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "cfsetospeed", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tcdrain", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "tcflow", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tcflush", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tcgetattr", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tcgetsid", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "tcsendbreak", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tcsetattr", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec SysSelect = HeaderSpec< - "sys/select.h", - [], // Macros - [FdSet, SigSetType, StructTimevalType, StructTimeSpec, SuSecondsT, TimeTType], - [], // Enumerations - [ - FunctionSpec< - "select", - RetValSpec, - [ - ArgSpec, ArgSpec, ArgSpec, - ArgSpec, ArgSpec - ] - > - ] - >; - - HeaderSpec SysSocket = HeaderSpec< - "sys/socket.h", - [ - Macro<"AF_UNSPEC">, - Macro<"AF_UNIX">, - Macro<"AF_LOCAL">, - Macro<"AF_INET">, - Macro<"AF_INET6">, - Macro<"SOCK_STREAM">, - Macro<"SOCK_DGRAM">, - Macro<"SOCK_RAW">, - Macro<"SOCK_RDM">, - Macro<"SOCK_SEQPACKET">, - Macro<"SOCK_PACKET">, - ], // Macros - [ - SizeTType, - SSizeTType, - SAFamilyType, - StructSockAddr, - StructSockAddrUn, - SocklenType, - StructIovec, - StructMsghdr, - ], // Types - [], // Enumerations - [ - FunctionSpec< - "socket", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "socketpair", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "bind", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "send", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "sendto", - RetValSpec, - [ - ArgSpec, ArgSpec, ArgSpec, - ArgSpec, ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "sendmsg", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "recv", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "recvfrom", - RetValSpec, - [ - ArgSpec, ArgSpec, ArgSpec, - ArgSpec, ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "recvmsg", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - ] // Functions - >; - - HeaderSpec SysTypes = HeaderSpec< - "sys/types.h", - [], // Macros - [ - BlkCntT, - BlkSizeT, - ClockIdT, - DevT, - GidT, - InoT, - ModeTType, - NLinkT, - OffTType, - PThreadAttrTType, - PThreadCondAttrTType, - PThreadKeyT, - PThreadMutexAttrTType, - PThreadMutexTType, - PThreadOnceT, - PThreadRWLockAttrTType, - PThreadRWLockTType, - PThreadTType, - PidT, - SSizeTType, - SizeTType, - SuSecondsT, - TimeTType, - UidT - ], // Types - [], // Enumerations - [] // Functions - >; - - let Headers = [ - ArpaInet, - CType, - Dirent, - DlFcn, - Errno, - FCntl, - PThread, - Sched, - Signal, - Spawn, - StdIO, - StdLib, - SysIOctl, - SysMMan, - SysResource, - SysSelect, - SysSocket, - SysStat, - SysStatvfs, - SysTypes, - SysUtsName, - SysWait, - Time, - Termios, - UniStd, - String, - Search, - ]; -} diff --git a/libc/spec/spec.td b/libc/spec/spec.td deleted file mode 100644 index ad9ca76846c28..0000000000000 --- a/libc/spec/spec.td +++ /dev/null @@ -1,253 +0,0 @@ -class Type {} - -class NamedType : Type { - string Name = name; -} - -class Field { - string Name = name; - Type FieldType = type; -} - -// Class to describe concrete structs specified by a standard. -class Struct : NamedType { - list Fields; -} - -class EnumNameValue { - string Name = name; - string Value = value; -} - -class Enum enumerations> : NamedType { - list Enumerations = enumerations; -} - -class PtrType : Type { - Type PointeeType = type; -} - -class ConstType : Type { - Type UnqualifiedType = type; -} - -class RestrictedPtrType : Type { - Type PointeeType = type; -} - -// Builtin types. -def VarArgType : NamedType<"...">; -def VaListType : NamedType<"va_list">; -def VoidType : NamedType<"void">; -def IntType : NamedType<"int">; -def UnsignedIntType : NamedType<"unsigned int">; -def LongType : NamedType<"long">; -def UnsignedLongType : NamedType<"unsigned long">; -def LongLongType : NamedType<"long long">; -def UnsignedLongLongType : NamedType<"unsigned long long">; -def FloatType : NamedType<"float">; -def DoubleType : NamedType<"double">; -def LongDoubleType : NamedType<"long double">; -def CharType : NamedType<"char">; -def UnsignedCharType : NamedType<"unsigned char">; -def UnsignedShortType : NamedType<"unsigned short">; -def BoolType : NamedType<"bool">; - -def Float16Type : NamedType<"_Float16">; -def Float128Type : NamedType<"float128">; - -// Common types -def VoidPtr : PtrType; -def VoidPtrPtr : PtrType; -def RestrictedVoidPtrPtr : RestrictedPtrType; -def ConstVoidPtr : ConstType; - -def SizeTType : NamedType<"size_t">; -def SizeTPtr : PtrType; -def RestrictedSizeTPtr : RestrictedPtrType; - -def Char8TType : NamedType<"char8_t">; -def Char16TType : NamedType<"char16_t">; -def Char32TType : NamedType<"char32_t">; -def WCharType : NamedType<"wchar_t">; -def WIntType : NamedType<"wint_t">; - -def LongDoublePtr : PtrType; - -def IntMaxTType : NamedType<"intmax_t">; -def UIntMaxTType : NamedType<"uintmax_t">; - -def UInt16Type : NamedType<"uint16_t">; -def UInt32Type : NamedType<"uint32_t">; - -def OffTType : NamedType<"off_t">; -def OffTPtr : PtrType; -def SSizeTType : NamedType<"ssize_t">; - -// _Noreturn is really not a type, but it is convenient to treat it as a type. -def NoReturn : NamedType<"_Noreturn void">; - -//types moved from stdc.td -def VoidRestrictedPtr : RestrictedPtrType; -def ConstVoidRestrictedPtr : ConstType; - -def CharPtr : PtrType; -def ConstCharPtr : ConstType; -def CharRestrictedPtr : RestrictedPtrType; -def CharRestrictedPtrPtr : RestrictedPtrType; -def ConstCharRestrictedPtr : ConstType; -def ConstCharRestrictedPtrPtr : PtrType; - -def OnceFlagType : NamedType<"once_flag">; -def OnceFlagTypePtr : PtrType; -// TODO(sivachandra): Remove this non-standard type when a formal -// way to describe callable types is available. -def CallOnceFuncType : NamedType<"__call_once_func_t">; -def MtxTType : NamedType<"mtx_t">; -def MtxTTypePtr : PtrType; -def CndTType : NamedType<"cnd_t">; -def CndTTypePtr : PtrType; -def ThrdStartTType : NamedType<"thrd_start_t">; -def ThrdTType : NamedType<"thrd_t">; -def ThrdTTypePtr : PtrType; - -def IntPtr : PtrType; -def RestrictedIntPtr : RestrictedPtrType; -def FloatPtr : PtrType; -def DoublePtr : PtrType; -def Float16Ptr : PtrType; -def Float128Ptr : PtrType; -def UnsignedCharPtr : PtrType; - -def ConstDoublePtr : ConstType; -def ConstFloatPtr : ConstType; -def ConstLongDoublePtr : ConstType; -def ConstFloat16Ptr : ConstType; -def ConstFloat128Ptr : ConstType; - -def SigHandlerT : NamedType<"__sighandler_t">; - -def TimeTType : NamedType<"time_t">; - -def StructTimeSpec : NamedType<"struct timespec">; -def StructTimeSpecPtr : PtrType; -def ConstStructTimeSpecPtr : ConstType; -def RestrictStructTimeSpecPtr : RestrictedPtrType; -def ConstRestrictStructTimeSpecPtr : ConstType; - -def BSearchCompareT : NamedType<"__bsearchcompare_t">; -def QSortCompareT : NamedType<"__qsortcompare_t">; - -def AtexitHandlerT : NamedType<"__atexithandler_t">; - -def FILE : NamedType<"FILE">; -def FILEPtr : PtrType; -def FILERestrictedPtr : RestrictedPtrType; - -def PThreadTType : NamedType<"pthread_t">; - -def PidT : NamedType<"pid_t">; -def RestrictedPidTPtr : RestrictedPtrType; - -def StructRUsage : NamedType<"struct rusage">; -def StructRUsagePtr : PtrType; - -def StructTimevalType : NamedType<"struct timeval">; -def StructTimevalPtr : PtrType; -def RestrictedStructTimevalPtr : RestrictedPtrType; - -def SuSecondsT : NamedType<"suseconds_t">; - -//added because __assert_fail needs it. -def UnsignedType : NamedType<"unsigned">; - -def ActionType : NamedType<"ACTION">; -def EntryType : NamedType<"ENTRY">; -def EntryTypePtr : PtrType; -def EntryTypePtrPtr : PtrType; - -def MBStateTType : NamedType<"mbstate_t">; - -class Macro { - string Name = name; -} - -class EnumeratedNameValue { - string Name = name; - string Value = value; -} - -class Annotation {} - -class RetValSpec annotations = []> { - Type ReturnType = type; - list Annotations = annotations; -} - -class ArgSpec annotations = [], string name = ""> { - Type ArgType = type; - list Annotations = annotations; - string Name = name; -} - -// The following classes are used to describe function attributes. -// In the future, we may consider supporting parameter attributes as well. -// https://clang.llvm.org/docs/AttributeReference.html -class FunctionAttr { - string Attr = attr; - // The style of the attribute, e.g. "gnu", "cxx11", "declspec". - // - "gnu" is for GNU-style attributes: __attribute__((...)) - // - "cxx11" is for C++11-style attributes: [[...]] - // - "declspec" is for Microsoft-style attributes: __declspec(...) - string Style = style; - - // For the time being, we are only interested in identifer-like attributes. - // We can extend this to support function-like attributes if needed. - // For example, in the future, we can #define __LIBC_ATTRIBUTE_NODISCARD(...) [[nodiscard(__VA_ARGS__)]] - // int FunctionLike = 0; -} -class GnuFunctionAttr : FunctionAttr<"gnu", attr> {} -class Cxx11FunctionAttr : FunctionAttr<"cxx11", attr> { - // The namespace of the attribute, e.g. "gnu" or "clang". Empty string means there is no namespace. - string Namespace = namespace; -} -class DeclspecFunctionAttr : FunctionAttr<"declspec", attr> {} -class FunctionAttrSpec instances> { - list Instances = instances; - string Macro = macro; -} - -class FunctionSpec args, list attrs = []> { - string Name = name; - RetValSpec Return = return; - list Args = args; - list Attributes = attrs; -} - -class GuardedFunctionSpec args, string guard_macro> : FunctionSpec { - string Guard = guard_macro; -} - -class ObjectSpec { - string Name = name; - string Type = type; -} - -class HeaderSpec macros = [], - list types = [], - list enumerations = [], - list functions = [], - list objects = []> { - string Name = name; - list Functions = functions; - list Types = types; - list Macros = macros; - list Enumerations = enumerations; - list Objects = objects; -} - -class StandardSpec { - string Name = name; - list Headers; -} diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td deleted file mode 100644 index 493ca1a6440df..0000000000000 --- a/libc/spec/stdc.td +++ /dev/null @@ -1,1827 +0,0 @@ -def StdC : StandardSpec<"stdc"> { - - NamedType StructTmType = NamedType<"struct tm">; - PtrType StructTmPtr = PtrType; - PtrType TimeTTypePtr = PtrType; - NamedType ClockT = NamedType<"clock_t">; - NamedType LocaleT = NamedType<"locale_t">; - - NamedType DivTType = NamedType<"div_t">; - NamedType LDivTType = NamedType<"ldiv_t">; - NamedType LLDivTType = NamedType<"lldiv_t">; - - NamedType JmpBuf = NamedType<"jmp_buf">; - - NamedType TssTType = NamedType<"tss_t">; - PtrType TssTPtr = PtrType; - NamedType TssDtorTType = NamedType<"tss_dtor_t">; - - HeaderSpec Assert = HeaderSpec< - "assert.h", - [ - Macro<"static_assert">, - Macro<"assert">, - ], - [], // Types - [], // Enumerations - [] - >; - - FunctionAttrSpec ConstAttr = FunctionAttrSpec<"__LIBC_CONST_ATTR", [ - Cxx11FunctionAttr<"const", "gnu">, - GnuFunctionAttr<"const">, - ]>; - - HeaderSpec CType = HeaderSpec< - "ctype.h", - [], // Macros - [ - LocaleT - ], // Types - [], // Enumerations - [ - FunctionSpec< - "isalnum", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isalpha", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isblank", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "iscntrl", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isdigit", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isgraph", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "islower", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isprint", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ispunct", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isspace", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isupper", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isxdigit", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "tolower", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "toupper", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "isalnum_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isalpha_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isblank_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "iscntrl_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isdigit_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isgraph_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "islower_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isprint_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "ispunct_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isspace_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isupper_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "isxdigit_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tolower_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "toupper_l", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - NamedType FEnvT = NamedType<"fenv_t">; - PtrType FEnvTPtr = PtrType; - ConstType ConstFEnvTPtr = ConstType; - NamedType FExceptT = NamedType<"fexcept_t">; - PtrType FExceptTPtr = PtrType; - ConstType ConstFExceptTPtr = ConstType; - HeaderSpec Fenv = HeaderSpec< - "fenv.h", - [ - Macro<"FE_DIVBYZERO">, - Macro<"FE_INEXACT">, - Macro<"FE_INVALID">, - Macro<"FE_OVERFLOW">, - Macro<"FE_UNDERFLOW">, - Macro<"FE_ALL_EXCEPT">, - - Macro<"FE_DOWNWARD">, - Macro<"FE_TONEAREST">, - Macro<"FE_TOWARDZERO">, - Macro<"FE_UPWARD">, - - Macro<"FE_DFL_ENV"> - ], - [ - FEnvT, - FExceptT, - ], // Types - [], // Enumerations - [ - FunctionSpec< - "feclearexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fetestexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fetestexceptflag", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "feraiseexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fesetround", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fegetround", - RetValSpec, - [] - >, - FunctionSpec< - "fegetenv", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fesetenv", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fegetexceptflag", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "fesetexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fesetexceptflag", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "feholdexcept", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "feupdateenv", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec String = HeaderSpec< - "string.h", - [ - Macro<"NULL">, - ], - [ - SizeTType, - ], - [], // Enumerations - [ - FunctionSpec< - "memcpy", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "memmove", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "memcmp", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "memchr", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "memset", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "memset_explicit", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strcpy", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strncpy", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "strcat", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strncat", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strcmp", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strcoll", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strcoll_l", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strncmp", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "strxfrm", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "strxfrm_l", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "strchr", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strcspn", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strdup", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "strndup", - RetValSpec, - [ArgSpec,ArgSpec] - >, - FunctionSpec< - "strpbrk", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strrchr", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strspn", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strstr", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strtok", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "strerror", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "strlen", - RetValSpec, - [ArgSpec] - >, - ] - >; - - HeaderSpec Math = HeaderSpec< - "math.h", - [ - Macro<"MATH_ERRNO">, - Macro<"MATH_ERREXCEPT">, - Macro<"math_errhandling">, - - Macro<"HUGE_VAL">, - Macro<"INFINITY">, - Macro<"NAN">, - - Macro<"FP_INT_UPWARD">, - Macro<"FP_INT_DOWNWARD">, - Macro<"FP_INT_TOWARDZERO">, - Macro<"FP_INT_TONEARESTFROMZERO">, - Macro<"FP_INT_TONEAREST">, - - Macro<"FP_ILOGB0">, - Macro<"FP_ILOGBNAN">, - - Macro<"isfinite">, - Macro<"isinf">, - Macro<"isnan">, - ], - [ - NamedType<"float_t">, - NamedType<"double_t">, - NamedType<"float128">, - ], - [], // Enumerations - [ - FunctionSpec<"cbrt", RetValSpec, [ArgSpec]>, - FunctionSpec<"cbrtf", RetValSpec, [ArgSpec]>, - - FunctionSpec<"copysign", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"copysignf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"copysignl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"copysignf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"copysignf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"ceil", RetValSpec, [ArgSpec]>, - FunctionSpec<"ceilf", RetValSpec, [ArgSpec]>, - FunctionSpec<"ceill", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"ceilf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"ceilf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"daddl", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"ddivl", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"dfmal", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"dsubl", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"fabs", RetValSpec, [ArgSpec], [ConstAttr]>, - FunctionSpec<"fabsf", RetValSpec, [ArgSpec]>, - FunctionSpec<"fabsl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"fabsf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fabsf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fadd", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"faddl", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"fdim", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fdimf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fdiml", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fdimf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fdimf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fdiv", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fdivl", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"ffma", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"ffmal", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - - FunctionSpec<"floor", RetValSpec, [ArgSpec]>, - FunctionSpec<"floorf", RetValSpec, [ArgSpec]>, - FunctionSpec<"floorl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"floorf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"floorf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fmin", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fminf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - GuardedFunctionSpec<"fminf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"fmax", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaxf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaxl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fmaxf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - GuardedFunctionSpec<"fmaxf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"fmaximum", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximumf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximuml", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fmaximumf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fmaximumf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fmaximum_num", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_numf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_numl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fmaximum_numf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fmaximum_numf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fmaximum_mag", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_magf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_magl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fmaximum_magf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fmaximum_magf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fmaximum_mag_num", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_mag_numf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_mag_numl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fmaximum_mag_numf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fmaximum_mag_numf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fminimum", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimumf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimuml", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fminimumf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fminimumf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fminimum_num", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimum_numf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmaximum_numl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fminimum_numf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fminimum_numf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fminimum_mag", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimum_magf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimum_magl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fminimum_magf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fminimum_magf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fminimum_mag_num", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimum_mag_numf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fminimum_mag_numl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fminimum_mag_numf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fminimum_mag_numf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fma", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - - GuardedFunctionSpec<"f16fmaf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">, - - FunctionSpec<"fmod", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmodf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmodl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fmodf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fmodf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"frexp", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"frexpf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"frexpl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"frexpf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"frexpf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fromfp", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"fromfpf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"fromfpl", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fromfpf16", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fromfpf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fromfpx", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"fromfpxf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"fromfpxl", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"fromfpxf16", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"fromfpxf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"fsub", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fsubl", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"ufromfp", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"ufromfpf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"ufromfpl", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"ufromfpf16", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"ufromfpf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"ufromfpx", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"ufromfpxf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"ufromfpxl", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"ufromfpxf16", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"ufromfpxf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"hypot", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"hypotf", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"ilogb", RetValSpec, [ArgSpec]>, - FunctionSpec<"ilogbf", RetValSpec, [ArgSpec]>, - FunctionSpec<"ilogbl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"ilogbf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"ilogbf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"llogb", RetValSpec, [ArgSpec]>, - FunctionSpec<"llogbf", RetValSpec, [ArgSpec]>, - FunctionSpec<"llogbl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"llogbf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"llogbf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"ldexp", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"ldexpf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"ldexpl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"ldexpf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"ldexpf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"log10", RetValSpec, [ArgSpec]>, - FunctionSpec<"log10f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"log10f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"log1p", RetValSpec, [ArgSpec]>, - FunctionSpec<"log1pf", RetValSpec, [ArgSpec]>, - - FunctionSpec<"log2", RetValSpec, [ArgSpec]>, - FunctionSpec<"log2f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"log2f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"log", RetValSpec, [ArgSpec]>, - FunctionSpec<"logf", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"logf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"logb", RetValSpec, [ArgSpec]>, - FunctionSpec<"logbf", RetValSpec, [ArgSpec]>, - FunctionSpec<"logbl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"logbf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"logbf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"modf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"modff", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"modfl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"modff16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"modff128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"cos", RetValSpec, [ArgSpec]>, - FunctionSpec<"cosf", RetValSpec, [ArgSpec]>, - FunctionSpec<"sin", RetValSpec, [ArgSpec]>, - FunctionSpec<"sinf", RetValSpec, [ArgSpec]>, - FunctionSpec<"tan", RetValSpec, [ArgSpec]>, - FunctionSpec<"tanf", RetValSpec, [ArgSpec]>, - - FunctionSpec<"erff", RetValSpec, [ArgSpec]>, - - FunctionSpec<"exp", RetValSpec, [ArgSpec]>, - FunctionSpec<"expf", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"expf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"exp2", RetValSpec, [ArgSpec]>, - FunctionSpec<"exp2f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"exp2f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"exp2m1f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"exp2m1f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"expm1", RetValSpec, [ArgSpec]>, - FunctionSpec<"expm1f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"expm1f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"exp10", RetValSpec, [ArgSpec]>, - FunctionSpec<"exp10f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"exp10f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"exp10m1f", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"exp10m1f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"remainder", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"remainderf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"remainderl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"remainderf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"remainderf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"remquo", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"remquof", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"remquol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"remquof16", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"remquof128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"round", RetValSpec, [ArgSpec]>, - FunctionSpec<"roundf", RetValSpec, [ArgSpec]>, - FunctionSpec<"roundl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"roundf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"roundf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"roundeven", RetValSpec, [ArgSpec]>, - FunctionSpec<"roundevenf", RetValSpec, [ArgSpec]>, - FunctionSpec<"roundevenl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"roundevenf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"roundevenf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"lround", RetValSpec, [ArgSpec]>, - FunctionSpec<"lroundf", RetValSpec, [ArgSpec]>, - FunctionSpec<"lroundl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"lroundf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"lroundf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"llround", RetValSpec, [ArgSpec]>, - FunctionSpec<"llroundf", RetValSpec, [ArgSpec]>, - FunctionSpec<"llroundl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"llroundf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"llroundf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"rint", RetValSpec, [ArgSpec]>, - FunctionSpec<"rintf", RetValSpec, [ArgSpec]>, - FunctionSpec<"rintl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"rintf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"rintf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"lrint", RetValSpec, [ArgSpec]>, - FunctionSpec<"lrintf", RetValSpec, [ArgSpec]>, - FunctionSpec<"lrintl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"lrintf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"lrintf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"llrint", RetValSpec, [ArgSpec]>, - FunctionSpec<"llrintf", RetValSpec, [ArgSpec]>, - FunctionSpec<"llrintl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"llrintf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"llrintf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"sqrt", RetValSpec, [ArgSpec]>, - FunctionSpec<"sqrtf", RetValSpec, [ArgSpec]>, - FunctionSpec<"sqrtl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"sqrtf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"sqrtf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"trunc", RetValSpec, [ArgSpec]>, - FunctionSpec<"truncf", RetValSpec, [ArgSpec]>, - FunctionSpec<"truncl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"truncf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"truncf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"nearbyint", RetValSpec, [ArgSpec]>, - FunctionSpec<"nearbyintf", RetValSpec, [ArgSpec]>, - FunctionSpec<"nearbyintl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"nearbyintf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"nearbyintf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"nextafterf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"nextafter", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"nextafterl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"nextafterf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"nextafterf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"nexttowardf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"nexttoward", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"nexttowardl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"nexttowardf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"nextdown", RetValSpec, [ArgSpec]>, - FunctionSpec<"nextdownf", RetValSpec, [ArgSpec]>, - FunctionSpec<"nextdownl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"nextdownf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"nextdownf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"nextup", RetValSpec, [ArgSpec]>, - FunctionSpec<"nextupf", RetValSpec, [ArgSpec]>, - FunctionSpec<"nextupl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"nextupf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"nextupf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"powf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"pow", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"coshf", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"coshf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"sinhf", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"sinhf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"tanhf", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"tanhf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - - FunctionSpec<"acosf", RetValSpec, [ArgSpec]>, - - FunctionSpec<"asinf", RetValSpec, [ArgSpec]>, - FunctionSpec<"asin", RetValSpec, [ArgSpec]>, - - FunctionSpec<"atanf", RetValSpec, [ArgSpec]>, - - FunctionSpec<"atan2", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"atan2f", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"atan2l", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"acoshf", RetValSpec, [ArgSpec]>, - FunctionSpec<"asinhf", RetValSpec, [ArgSpec]>, - FunctionSpec<"atanhf", RetValSpec, [ArgSpec]>, - - FunctionSpec<"scalbln", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"scalblnf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"scalblnl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"scalblnf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"scalblnf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"scalbn", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"scalbnf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"scalbnl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"scalbnf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"scalbnf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"nanf", RetValSpec, [ArgSpec]>, - FunctionSpec<"nan", RetValSpec, [ArgSpec]>, - FunctionSpec<"nanl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"nanf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"nanf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"issignaling", RetValSpec, [ArgSpec]>, - FunctionSpec<"issignalingf", RetValSpec, [ArgSpec]>, - FunctionSpec<"issignalingl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"issignalingf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"issignalingf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"canonicalize", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"canonicalizef", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"canonicalizel", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"canonicalizef16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"canonicalizef128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"iscanonical", RetValSpec, [ArgSpec]>, - FunctionSpec<"iscanonicalf", RetValSpec, [ArgSpec]>, - FunctionSpec<"iscanonicall", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"iscanonicalf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"iscanonicalf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"dsqrtl", RetValSpec, [ArgSpec]>, - - FunctionSpec<"totalorder", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"totalorderf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"totalorderl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"totalorderf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"totalorderf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"totalordermag", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"totalordermagf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"totalordermagl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"totalordermagf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"totalordermagf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"getpayload", RetValSpec, [ArgSpec]>, - FunctionSpec<"getpayloadf", RetValSpec, [ArgSpec]>, - FunctionSpec<"getpayloadl", RetValSpec, [ArgSpec]>, - GuardedFunctionSpec<"getpayloadf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"getpayloadf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"setpayload", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"setpayloadf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"setpayloadl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"setpayloadf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"setpayloadf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - FunctionSpec<"setpayloadsig", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"setpayloadsigf", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"setpayloadsigl", RetValSpec, [ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"setpayloadsigf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - GuardedFunctionSpec<"setpayloadsigf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, - - GuardedFunctionSpec<"f16addf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">, - - GuardedFunctionSpec<"f16subf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">, - - FunctionSpec<"fmul", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fmull", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"dmull", RetValSpec, [ArgSpec, ArgSpec]>, - - GuardedFunctionSpec<"f16mulf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">, - - FunctionSpec<"fsqrt", RetValSpec, [ArgSpec]>, - FunctionSpec<"fsqrtl", RetValSpec, [ArgSpec]>, - - GuardedFunctionSpec<"f16divf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">, - - GuardedFunctionSpec<"f16sqrtf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">, - - FunctionSpec<"lgamma", RetValSpec, [ArgSpec]>, - FunctionSpec<"lgammaf", RetValSpec, [ArgSpec]>, - FunctionSpec<"lgammal", RetValSpec, [ArgSpec]>, - ] - >; - - HeaderSpec StdIO = HeaderSpec< - "stdio.h", - [ - Macro<"stdin">, - Macro<"stderr">, - Macro<"stdout">, - Macro<"_IOFBF">, - Macro<"_IOLBF">, - Macro<"_IONBF">, - Macro<"EOF">, - ], // Macros - [ // Types - SizeTType, - FILE, - ], - [], // Enumerations - [ - FunctionSpec< - "clearerr", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fclose", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "feof", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ferror", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fgetc", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fgets", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "fflush", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fopen", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fputc", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "ftell", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getc", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "getchar", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "putc", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "putchar", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fputs", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "puts", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "fread", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fseek", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fwrite", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "remove", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "rename", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "setbuf", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "setvbuf", - RetValSpec, - [ArgSpec, ArgSpec, ArgSpec, ArgSpec] - >, - FunctionSpec< - "sscanf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vsscanf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "scanf", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vscanf", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fscanf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vfscanf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "sprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "snprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "printf", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "fprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "asprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vsprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vsnprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vprintf", - RetValSpec, - [ArgSpec, - ArgSpec] - >, - FunctionSpec< - "vfprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - FunctionSpec< - "ungetc", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "vasprintf", - RetValSpec, - [ArgSpec, - ArgSpec, - ArgSpec] - >, - ], - [ - ObjectSpec< - "stdin", - "FILE *" - >, - ObjectSpec< - "stdout", - "FILE *" - >, - ObjectSpec< - "stderr", - "FILE *" - >, - ] - >; - - HeaderSpec StdBit = HeaderSpec< - "stdbit.h", - [ - Macro<"__STDC_VERSION_STDBIT_H__">, - Macro<"__STDC_ENDIAN_LITTLE__">, - Macro<"__STDC_ENDIAN_BIG__">, - Macro<"__STDC_ENDIAN_NATIVE__">, - Macro<"stdc_leading_zeros">, - Macro<"stdc_leading_ones">, - Macro<"stdc_trailing_zeros">, - Macro<"stdc_trailing_ones">, - Macro<"stdc_first_leading_zero">, - Macro<"stdc_first_leading_one">, - Macro<"stdc_first_trailing_zero">, - Macro<"stdc_first_trailing_one">, - Macro<"stdc_count_zeros">, - Macro<"stdc_count_ones">, - Macro<"stdc_has_single_bit">, - Macro<"stdc_bit_width">, - Macro<"stdc_bit_floor">, - Macro<"stdc_bit_ceil"> - ], // Macros - [], // Types - [], // Enumerations - [ - FunctionSpec<"stdc_leading_zeros_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_zeros_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_zeros_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_zeros_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_zeros_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_ones_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_ones_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_ones_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_ones_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_leading_ones_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_zeros_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_zeros_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_zeros_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_zeros_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_zeros_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_ones_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_ones_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_ones_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_ones_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_trailing_ones_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_zero_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_zero_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_zero_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_zero_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_zero_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_one_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_one_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_one_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_one_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_one_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_one_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_one_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_one_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_one_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_one_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_zeros_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_zeros_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_zeros_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_zeros_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_zeros_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_ones_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_ones_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_ones_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_ones_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_count_ones_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_has_single_bit_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_has_single_bit_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_has_single_bit_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_has_single_bit_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_has_single_bit_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_width_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_floor_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_floor_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_floor_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_floor_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_floor_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_ceil_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_ceil_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_ceil_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_ceil_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_bit_ceil_ull", RetValSpec, [ArgSpec]> - ] // Functions - >; - - HeaderSpec StdCkdInt = HeaderSpec< - "stdckdint.h", - [ - Macro<"__STDC_VERSION_STDCKDINT_H__">, - Macro<"ckd_add">, - Macro<"ckd_sub">, - Macro<"ckd_mul"> - ], // Macros - [], // Types - [], // Enumerations - [] // Functions - >; - - HeaderSpec StdLib = HeaderSpec< - "stdlib.h", - [], // Macros - [ - DivTType, - LDivTType, - LLDivTType, - SizeTType, - BSearchCompareT, - QSortCompareT, - AtexitHandlerT, - ], // Types - [], // Enumerations - [ - FunctionSpec<"abort", RetValSpec, [ArgSpec]>, - - FunctionSpec<"bsearch", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - - FunctionSpec<"abs", RetValSpec, [ArgSpec]>, - FunctionSpec<"labs", RetValSpec, [ArgSpec]>, - FunctionSpec<"llabs", RetValSpec, [ArgSpec]>, - - FunctionSpec<"atof", RetValSpec, [ArgSpec]>, - FunctionSpec<"atoi", RetValSpec, [ArgSpec]>, - FunctionSpec<"atol", RetValSpec, [ArgSpec]>, - FunctionSpec<"atoll", RetValSpec, [ArgSpec]>, - - FunctionSpec<"div", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"ldiv", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"lldiv", RetValSpec, [ArgSpec, ArgSpec]>, - - FunctionSpec<"qsort", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - - FunctionSpec<"rand", RetValSpec, [ArgSpec]>, - FunctionSpec<"srand", RetValSpec, [ArgSpec]>, - - FunctionSpec<"strfromf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strfromd", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strfroml", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - - FunctionSpec<"strtof", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"strtod", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"strtold", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"strtol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoll", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoul", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoull", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - - FunctionSpec<"strtof_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtod_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtold_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtol_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoll_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoul_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoull_l", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - - FunctionSpec<"malloc", RetValSpec, [ArgSpec]>, - FunctionSpec<"calloc", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"realloc", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"aligned_alloc", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"free", RetValSpec, [ArgSpec]>, - - FunctionSpec<"_Exit", RetValSpec, [ArgSpec]>, - FunctionSpec<"at_quick_exit", RetValSpec, [ArgSpec]>, - FunctionSpec<"atexit", RetValSpec, [ArgSpec]>, - FunctionSpec<"exit", RetValSpec, [ArgSpec]>, - FunctionSpec<"quick_exit", RetValSpec, [ArgSpec]>, - - FunctionSpec<"system", RetValSpec, [ArgSpec]>, - ] - >; - - NamedType IMaxDivTType = NamedType<"imaxdiv_t">; - - HeaderSpec IntTypes = HeaderSpec< - "inttypes.h", - [ - Macro<"__STDC_VERSION_INTTYPES_H__">, - ], // Macros - [ - IMaxDivTType, - ], // Types - [], // Enumerations - [ - FunctionSpec<"imaxabs", RetValSpec, [ArgSpec]>, - FunctionSpec<"imaxdiv", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"strtoimax", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtoumax", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - ] - >; - - HeaderSpec Errno = HeaderSpec< - "errno.h", - [ - Macro<"errno">, - Macro<"EDOM">, - Macro<"EILSEQ">, - Macro<"ERANGE">, - ] - >; - - HeaderSpec Float = HeaderSpec< - "float.h", - [ - Macro<"FLT_MANT_DIG">, - Macro<"DBL_MANT_DIG">, - Macro<"LDBL_MANT_DIG">, - ] - >; - - HeaderSpec StdInt = HeaderSpec<"StdInt.h">; - - HeaderSpec Limits = HeaderSpec<"limits.h">; - - NamedType SigAtomicT = NamedType<"sig_atomic_t">; - HeaderSpec Signal = HeaderSpec< - "signal.h", - [ - Macro<"SIG_BLOCK">, - Macro<"SIG_UNBLOCK">, - Macro<"SIG_SETMASK">, - - Macro<"SIGABRT">, - Macro<"SIGFPE">, - Macro<"SIGILL">, - Macro<"SIGINT">, - Macro<"SIGSEGV">, - Macro<"SIGTERM"> - ], - [ - SizeTType, - SigAtomicT, - SigHandlerT, - ], - [], // Enumerations - [ - FunctionSpec<"raise", RetValSpec, [ArgSpec]>, - FunctionSpec< - "signal", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Threads = HeaderSpec< - "threads.h", - [ - Macro<"ONCE_FLAG_INIT">, - ], - [ - OnceFlagType, - CallOnceFuncType, - CndTType, - MtxTType, - ThrdStartTType, - ThrdTType, - TssTType, - TssDtorTType, - ], - [ - EnumeratedNameValue<"mtx_plain">, - EnumeratedNameValue<"mtx_recursive">, - EnumeratedNameValue<"mtx_timed">, - EnumeratedNameValue<"thrd_timedout">, - EnumeratedNameValue<"thrd_success">, - EnumeratedNameValue<"thrd_busy">, - EnumeratedNameValue<"thrd_error">, - EnumeratedNameValue<"thrd_nomem">, - ], - [ - FunctionSpec< - "call_once", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "cnd_broadcast", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "cnd_destroy", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "cnd_init", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "cnd_signal", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "cnd_wait", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "mtx_init", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "mtx_destroy", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "mtx_lock", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "mtx_unlock", - RetValSpec, - [ - ArgSpec, - ] - >, - FunctionSpec< - "thrd_create", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "thrd_join", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "thrd_detach", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "thrd_current", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "thrd_equal", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "thrd_exit", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "tss_create", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "tss_delete", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "tss_get", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "tss_set", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec Time = HeaderSpec< - "time.h", - [], // Macros - [ // Types - ClockT, - StructTmType, - StructTimeSpec, - TimeTType, - SizeTType, - ], - [], // Enumerations - [ - FunctionSpec< - "asctime", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "asctime_r", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "ctime", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "ctime_r", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "clock", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "difftime", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "gmtime", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "gmtime_r", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - FunctionSpec< - "mktime", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "time", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "timespec_get", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ] - >, - ] - >; - - HeaderSpec SetJmp = HeaderSpec< - "setjmp.h", - [], // Macros - [JmpBuf], - [], // Enumerations - [ - FunctionSpec< - "longjmp", - RetValSpec, - [ArgSpec, ArgSpec] - >, - FunctionSpec< - "setjmp", - RetValSpec, - [ArgSpec] - >, - FunctionSpec< - "longjmp", - RetValSpec, - [ArgSpec, ArgSpec] - >, - ] - >; - - HeaderSpec UChar = HeaderSpec< - "uchar.h", - [], // Macros - [ //Types - MBStateTType, - Char8TType, - Char16TType, - Char32TType, - SizeTType, - ], - [], // Enumerations - [] - >; - - HeaderSpec WChar = HeaderSpec< - "wchar.h", - [ // Macros - Macro<"WEOF">, - ], - [ //Types - MBStateTType, - SizeTType, - WIntType, - WCharType, - ], - [], // Enumerations - [ - FunctionSpec< - "wctob", - RetValSpec, - [ArgSpec] - >, - ] - >; - - NamedType StructLconv = NamedType<"struct lconv">; - PtrType StructLconvPtr = PtrType; - - HeaderSpec Locale = HeaderSpec< - "locale.h", - [], // Macros - [LocaleT, StructLconv], // Types - [], // Enumerations - [ - FunctionSpec< - "duplocale", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "freelocale", - RetValSpec, - [ - ArgSpec - ] - >, - FunctionSpec< - "localeconv", - RetValSpec, - [] - >, - FunctionSpec< - "newlocale", - RetValSpec, - [ - ArgSpec, - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "setlocale", - RetValSpec, - [ - ArgSpec, - ArgSpec - ] - >, - FunctionSpec< - "uselocale", - RetValSpec, - [ - ArgSpec - ] - > - ] // Functions - >; - - let Headers = [ - Assert, - CType, - Errno, - Fenv, - Float, - StdInt, - Limits, - Math, - String, - StdBit, - StdCkdInt, - StdIO, - StdLib, - IntTypes, - SetJmp, - Signal, - Threads, - Time, - UChar, - WChar, - Locale, - ]; -} diff --git a/libc/spec/stdc_ext.td b/libc/spec/stdc_ext.td deleted file mode 100644 index dee3b8bdf6fee..0000000000000 --- a/libc/spec/stdc_ext.td +++ /dev/null @@ -1,82 +0,0 @@ -// Fixed point types. -// From ISO/IEC TR 18037:2008 standard: -// https://standards.iso.org/ittf/PubliclyAvailableStandards/c051126_ISO_IEC_TR_18037_2008.zip -def ShortFractType : NamedType<"short fract">; -def FractType : NamedType<"fract">; -def LongFractType : NamedType<"long fract">; -def UnsignedShortFractType : NamedType<"unsigned short fract">; -def UnsignedFractType : NamedType<"unsigned fract">; -def UnsignedLongFractType : NamedType<"unsigned long fract">; - -def ShortAccumType : NamedType<"short accum">; -def AccumType : NamedType<"accum">; -def LongAccumType : NamedType<"long accum">; -def UnsignedShortAccumType : NamedType<"unsigned short accum">; -def UnsignedAccumType : NamedType<"unsigned accum">; -def UnsignedLongAccumType : NamedType<"unsigned long accum">; - -def IntHrT : NamedType <"int_hr_t">; -def IntRT : NamedType<"int_r_t">; -def IntLrT : NamedType<"int_lr_t">; -def IntHkT : NamedType<"int_hk_t">; -def IntKT : NamedType<"int_k_t">; -def IntLkT : NamedType<"int_lk_t">; -def UIntUhrT : NamedType<"uint_uhr_t">; -def UIntUrT : NamedType<"uint_ur_t">; -def UIntUlrT : NamedType<"uint_ulr_t">; -def UIntUhkT : NamedType<"uint_uhk_t">; -def UIntUkT : NamedType<"uint_uk_t">; -def UIntUlkT : NamedType<"uint_ulk_t">; - -def StdcExt : StandardSpec<"stdc_ext"> { - // From ISO/IEC TR 18037:2008 standard: - // https://standards.iso.org/ittf/PubliclyAvailableStandards/c051126_ISO_IEC_TR_18037_2008.zip - HeaderSpec StdFix = HeaderSpec< - "stdfix.h", - [], // macros - [IntHrT,IntRT, IntLrT, IntHkT, IntKT, IntLkT, UIntUhrT, UIntUrT, UIntUlrT, UIntUhkT, UIntUkT, UIntUlkT], // types - [], // enums - [ // functions - GuardedFunctionSpec<"abshr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"absr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"abslr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"abshk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"absk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"abslk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"roundhr", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundr", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundlr", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"roundhk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundlk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"rounduhr", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundur", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundulr", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"rounduhk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"rounduk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"roundulk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"hrbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"rbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"lrbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"hkbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"kbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"lkbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"uhrbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"urbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"ukbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"ulrbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"uhkbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"ulkbits", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - ] - >; - - let Headers = [ - StdFix, - ]; -} diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index e121555bd60a9..8ac8f91e98d4c 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -88,81 +88,3 @@ add_subdirectory(locale) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(pthread) endif() - -if(LLVM_RUNTIMES_BUILD OR LIBC_HDRGEN_EXE) - # The public API test below uses tablegen to generate the test - # source file. Since tablegen is not available during a runtimes - # build, we will skip the test. - # If a different libc-hdrgen binary is being used, then also we - # skip the api-test as we cannot generate the test source file. - return() -endif() - -set(public_test ${CMAKE_CURRENT_BINARY_DIR}/public_api_test.cpp) - -set(entrypoints_name_list "") -foreach(entry IN LISTS TARGET_LLVMLIBC_ENTRYPOINTS) - get_target_property(entry_name ${entry} "ENTRYPOINT_NAME") - list(APPEND entrypoints_name_list ${entry_name}) -endforeach() - -# TODO: Remove these when they are added to the TableGen. -list(REMOVE_ITEM entrypoints_name_list "__assert_fail" "__errno_location") -list(TRANSFORM entrypoints_name_list PREPEND "-e=") - -file(GLOB spec_files ${LIBC_SOURCE_DIR}/spec/*.td) - -# Generate api test souce code. -add_custom_command( - OUTPUT ${public_test} - COMMAND $ -o ${public_test} - ${entrypoints_name_list} - -I ${LIBC_SOURCE_DIR} - ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td - - DEPENDS ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td ${spec_files} - libc-prototype-testgen ${TARGET_PUBLIC_HEADERS} - ${LIBC_TARGET} -) - -add_custom_target(libc-api-test) -add_dependencies(check-libc libc-api-test) - -set( - allocator_entrypoints - libc.src.stdlib.malloc - libc.src.stdlib.calloc - libc.src.stdlib.realloc - libc.src.stdlib.aligned_alloc - libc.src.stdlib.free -) -set(api-test-entrypoints ${TARGET_LLVMLIBC_ENTRYPOINTS}) -list(REMOVE_ITEM api-test-entrypoints ${allocator_entrypoints}) -add_integration_test( - api-test - SUITE - libc-api-test - SRCS - ${public_test} - DEPENDS - ${api-test-entrypoints} -) - -if(COMPILER_RESOURCE_DIR AND LLVM_LIBC_ENABLE_LINTING) - add_custom_target( - libc-api-test-tidy - VERBATIM - COMMAND ${LLVM_LIBC_CLANG_TIDY} --system-headers - --checks=-*,llvmlibc-restrict-system-libc-headers - "--extra-arg=-resource-dir=${COMPILER_RESOURCE_DIR}" - --header-filter=.* - --warnings-as-errors=llvmlibc-* - "-config={CheckOptions: [{key: llvmlibc-restrict-system-libc-headers.Includes, value: '-*, linux/*, asm/*.h, asm-generic/*.h'}]}" - --quiet - -p ${PROJECT_BINARY_DIR} - ${public_test} - DEPENDS - clang-tidy ${public_test} - ) - add_dependencies(libc-api-test libc-api-test-tidy) -endif() diff --git a/libc/utils/HdrGen/CMakeLists.txt b/libc/utils/HdrGen/CMakeLists.txt deleted file mode 100644 index 47f845b9f9a5b..0000000000000 --- a/libc/utils/HdrGen/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -include(TableGen) - -if (NOT LLVM_LINK_LLVM_DYLIB) - set(LLVM_LINK_COMPONENTS Support) -endif() - -add_tablegen(libc-hdrgen LIBC - Command.h - Command.cpp - Generator.cpp - Generator.h - IncludeFileCommand.cpp - IncludeFileCommand.h - Main.cpp - PublicAPICommand.cpp - PublicAPICommand.h -) - -target_include_directories(libc-hdrgen PRIVATE ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) -target_link_libraries(libc-hdrgen PRIVATE LibcTableGenUtil) - -add_subdirectory(PrototypeTestGen) diff --git a/libc/utils/HdrGen/Command.cpp b/libc/utils/HdrGen/Command.cpp deleted file mode 100644 index 04462c18aac3f..0000000000000 --- a/libc/utils/HdrGen/Command.cpp +++ /dev/null @@ -1,15 +0,0 @@ -//===-- Base class for header generation commands -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Command.h" - -namespace llvm_libc { - -Command::~Command() {} - -} // namespace llvm_libc diff --git a/libc/utils/HdrGen/Command.h b/libc/utils/HdrGen/Command.h deleted file mode 100644 index 42516798ffaec..0000000000000 --- a/libc/utils/HdrGen/Command.h +++ /dev/null @@ -1,54 +0,0 @@ -//===-- Base class for header generation commands ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_UTILS_HDRGEN_COMMAND_H -#define LLVM_LIBC_UTILS_HDRGEN_COMMAND_H - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/SourceMgr.h" - -#include - -namespace llvm { - -class raw_ostream; -class RecordKeeper; - -} // namespace llvm - -namespace llvm_libc { - -typedef llvm::SmallVector ArgVector; - -class Command { -public: - class ErrorReporter { - llvm::SMLoc Loc; - const llvm::SourceMgr &SrcMgr; - - public: - ErrorReporter(llvm::SMLoc L, llvm::SourceMgr &SM) : Loc(L), SrcMgr(SM) {} - - [[noreturn]] void printFatalError(llvm::Twine Msg) const { - SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Error, Msg); - std::exit(1); - } - }; - - virtual ~Command(); - - virtual void run(llvm::raw_ostream &OS, const ArgVector &Args, - llvm::StringRef StdHeader, const llvm::RecordKeeper &Records, - const ErrorReporter &Reporter) const = 0; -}; - -} // namespace llvm_libc - -#endif // LLVM_LIBC_UTILS_HDRGEN_COMMAND_H diff --git a/libc/utils/HdrGen/Generator.cpp b/libc/utils/HdrGen/Generator.cpp deleted file mode 100644 index b4e1166b1a37b..0000000000000 --- a/libc/utils/HdrGen/Generator.cpp +++ /dev/null @@ -1,203 +0,0 @@ -//===-- Implementation of the main header generation class ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Generator.h" - -#include "IncludeFileCommand.h" -#include "PublicAPICommand.h" -#include "utils/LibcTableGenUtil/APIIndexer.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" - -#include -#include - -static const char CommandPrefix[] = "%%"; -static const size_t CommandPrefixSize = llvm::StringRef(CommandPrefix).size(); - -static const char CommentPrefix[] = ""; - -static const char ParamNamePrefix[] = "${"; -static const size_t ParamNamePrefixSize = - llvm::StringRef(ParamNamePrefix).size(); -static const char ParamNameSuffix[] = "}"; -static const size_t ParamNameSuffixSize = - llvm::StringRef(ParamNameSuffix).size(); - -namespace llvm_libc { - -Command *Generator::getCommandHandler(llvm::StringRef CommandName) { - if (CommandName == IncludeFileCommand::Name) { - if (!IncludeFileCmd) - IncludeFileCmd = std::make_unique(); - return IncludeFileCmd.get(); - } else if (CommandName == PublicAPICommand::Name) { - if (!PublicAPICmd) - PublicAPICmd = std::make_unique(EntrypointNameList); - return PublicAPICmd.get(); - } else { - return nullptr; - } -} - -void Generator::parseCommandArgs(llvm::StringRef ArgStr, ArgVector &Args) { - if (!ArgStr.contains(',') && ArgStr.trim(' ').trim('\t').size() == 0) { - // If it is just space between the parenthesis - return; - } - - ArgStr.split(Args, ","); - for (llvm::StringRef &A : Args) { - A = A.trim(' '); - if (A.starts_with(ParamNamePrefix) && A.ends_with(ParamNameSuffix)) { - A = A.drop_front(ParamNamePrefixSize).drop_back(ParamNameSuffixSize); - A = ArgMap[std::string(A)]; - } - } -} - -void Generator::generate(llvm::raw_ostream &OS, - const llvm::RecordKeeper &Records) { - auto DefFileBuffer = llvm::MemoryBuffer::getFile(HeaderDefFile); - if (!DefFileBuffer) { - llvm::errs() << "Unable to open " << HeaderDefFile << ".\n"; - std::exit(1); - } - llvm::SourceMgr SrcMgr; - unsigned DefFileID = SrcMgr.AddNewSourceBuffer( - std::move(DefFileBuffer.get()), llvm::SMLoc::getFromPointer(nullptr)); - - llvm::StringRef Content = SrcMgr.getMemoryBuffer(DefFileID)->getBuffer(); - while (true) { - std::pair P = Content.split('\n'); - Content = P.second; - - llvm::StringRef Line = P.first.trim(' '); - if (Line.starts_with(CommandPrefix)) { - Line = Line.drop_front(CommandPrefixSize); - - P = Line.split("("); - // It's possible that we have windows line endings, so strip off the extra - // CR. - P.second = P.second.trim(); - if (P.second.empty() || P.second[P.second.size() - 1] != ')') { - SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(P.second.data()), - llvm::SourceMgr::DK_Error, - "Command argument list should begin with '(' " - "and end with ')'."); - SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(P.second.data()), - llvm::SourceMgr::DK_Error, P.second.data()); - SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(P.second.data()), - llvm::SourceMgr::DK_Error, - std::to_string(P.second.size())); - std::exit(1); - } - llvm::StringRef CommandName = P.first; - Command *Cmd = getCommandHandler(CommandName); - if (Cmd == nullptr) { - SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(CommandName.data()), - llvm::SourceMgr::DK_Error, - "Unknown command '%%" + CommandName + "'."); - std::exit(1); - } - - llvm::StringRef ArgStr = P.second.drop_back(1); - ArgVector Args; - parseCommandArgs(ArgStr, Args); - - Command::ErrorReporter Reporter( - llvm::SMLoc::getFromPointer(CommandName.data()), SrcMgr); - Cmd->run(OS, Args, StdHeader, Records, Reporter); - } else if (!Line.starts_with(CommentPrefix)) { - // There is no comment or command on this line so we just write it as is. - OS << P.first << "\n"; - } - - if (P.second.empty()) - break; - } -} - -void Generator::generateDecls(llvm::raw_ostream &OS, - const llvm::RecordKeeper &Records) { - - OS << "//===-- C standard declarations for " << StdHeader << " " - << std::string(80 - (42 + StdHeader.size()), '-') << "===//\n" - << "//\n" - << "// Part of the LLVM Project, under the Apache License v2.0 with LLVM " - "Exceptions.\n" - << "// See https://llvm.org/LICENSE.txt for license information.\n" - << "// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" - << "//\n" - << "//" - "===-------------------------------------------------------------------" - "---===//\n\n"; - - std::string HeaderGuard(StdHeader.size(), '\0'); - llvm::transform(StdHeader, HeaderGuard.begin(), [](const char C) -> char { - return !isalnum(C) ? '_' : llvm::toUpper(C); - }); - OS << "#ifndef __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n" - << "#define __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n\n"; - - OS << "#ifndef __LIBC_ATTRS\n" - << "#define __LIBC_ATTRS\n" - << "#endif\n\n"; - - OS << "#ifdef __cplusplus\n" - << "extern \"C\" {\n" - << "#endif\n\n"; - - APIIndexer G(StdHeader, Records); - for (auto &Name : EntrypointNameList) { - // Filter out functions not exported by this header. - if (G.FunctionSpecMap.find(Name) == G.FunctionSpecMap.end()) - continue; - - const llvm::Record *FunctionSpec = G.FunctionSpecMap[Name]; - const llvm::Record *RetValSpec = FunctionSpec->getValueAsDef("Return"); - const llvm::Record *ReturnType = RetValSpec->getValueAsDef("ReturnType"); - - OS << G.getTypeAsString(ReturnType) << " " << Name << "("; - - auto ArgsList = FunctionSpec->getValueAsListOfDefs("Args"); - for (size_t i = 0; i < ArgsList.size(); ++i) { - const llvm::Record *ArgType = ArgsList[i]->getValueAsDef("ArgType"); - OS << G.getTypeAsString(ArgType); - if (i < ArgsList.size() - 1) - OS << ", "; - } - - OS << ") __LIBC_ATTRS;\n\n"; - } - - // Make another pass over entrypoints to emit object declarations. - for (const auto &Name : EntrypointNameList) { - if (G.ObjectSpecMap.find(Name) == G.ObjectSpecMap.end()) - continue; - const llvm::Record *ObjectSpec = G.ObjectSpecMap[Name]; - auto Type = ObjectSpec->getValueAsString("Type"); - OS << "extern " << Type << " " << Name << " __LIBC_ATTRS;\n"; - } - - // Emit a final newline if we emitted any object declarations. - if (llvm::any_of(EntrypointNameList, [&](const std::string &Name) { - return G.ObjectSpecMap.find(Name) != G.ObjectSpecMap.end(); - })) - OS << "\n"; - - OS << "#ifdef __cplusplus\n" - << "}\n" - << "#endif\n\n"; - OS << "#endif\n"; -} - -} // namespace llvm_libc diff --git a/libc/utils/HdrGen/Generator.h b/libc/utils/HdrGen/Generator.h deleted file mode 100644 index 1c149d2115640..0000000000000 --- a/libc/utils/HdrGen/Generator.h +++ /dev/null @@ -1,60 +0,0 @@ -//===-- The main header generation class ------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_UTILS_HDRGEN_GENERATOR_H -#define LLVM_LIBC_UTILS_HDRGEN_GENERATOR_H - -#include "Command.h" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" - -#include -#include -#include - -namespace llvm { - -class raw_ostream; -class RecordKeeper; - -} // namespace llvm - -namespace llvm_libc { - -class Command; - -class Generator { - llvm::StringRef HeaderDefFile; - const std::vector &EntrypointNameList; - llvm::StringRef StdHeader; - std::unordered_map &ArgMap; - - std::unique_ptr IncludeFileCmd; - std::unique_ptr PublicAPICmd; - - Command *getCommandHandler(llvm::StringRef CommandName); - - void parseCommandArgs(llvm::StringRef ArgStr, ArgVector &Args); - - void printError(llvm::StringRef Msg); - -public: - Generator(const std::string &DefFile, const std::vector &EN, - const std::string &Header, - std::unordered_map &Map) - : HeaderDefFile(DefFile), EntrypointNameList(EN), StdHeader(Header), - ArgMap(Map) {} - - void generate(llvm::raw_ostream &OS, const llvm::RecordKeeper &Records); - void generateDecls(llvm::raw_ostream &OS, const llvm::RecordKeeper &Records); -}; - -} // namespace llvm_libc - -#endif // LLVM_LIBC_UTILS_HDRGEN_GENERATOR_H diff --git a/libc/utils/HdrGen/IncludeFileCommand.cpp b/libc/utils/HdrGen/IncludeFileCommand.cpp deleted file mode 100644 index 43efe43585eb2..0000000000000 --- a/libc/utils/HdrGen/IncludeFileCommand.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===-- Implementation of IncludeFileCommand ------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IncludeFileCommand.h" - -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" - -#include - -namespace llvm_libc { - -const char IncludeFileCommand::Name[] = "include_file"; - -void IncludeFileCommand::run(llvm::raw_ostream &OS, const ArgVector &Args, - llvm::StringRef StdHeader, - const llvm::RecordKeeper &Records, - const Command::ErrorReporter &Reporter) const { - if (Args.size() != 1) { - Reporter.printFatalError( - "%%include_file command takes exactly 1 argument."); - } - - llvm::StringRef IncludeFile = Args[0]; - auto Buffer = llvm::MemoryBuffer::getFileAsStream(IncludeFile); - if (!Buffer) - Reporter.printFatalError(llvm::StringRef("Unable to open ") + IncludeFile); - - llvm::StringRef Content = Buffer.get()->getBuffer(); - - // If the included file has %%begin() command listed, then we want to write - // only the content after the begin command. - // TODO: The way the content is split below does not allow space within the - // the parentheses and, before and after the command. This probably is too - // strict and should be relaxed. - auto P = Content.split("\n%%begin()\n"); - if (P.second.empty()) { - // There was no %%begin in the content. - OS << P.first; - } else { - OS << P.second; - } -} - -} // namespace llvm_libc diff --git a/libc/utils/HdrGen/IncludeFileCommand.h b/libc/utils/HdrGen/IncludeFileCommand.h deleted file mode 100644 index b3a6ff5462ded..0000000000000 --- a/libc/utils/HdrGen/IncludeFileCommand.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- Class which implements the %%include_file command -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_UTILS_HDRGEN_INCLUDE_COMMAND_H -#define LLVM_LIBC_UTILS_HDRGEN_INCLUDE_COMMAND_H - -#include "Command.h" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" - -#include - -namespace llvm_libc { - -class IncludeFileCommand : public Command { -public: - static const char Name[]; - - void run(llvm::raw_ostream &OS, const ArgVector &Args, - llvm::StringRef StdHeader, const llvm::RecordKeeper &Records, - const Command::ErrorReporter &Reporter) const override; -}; - -} // namespace llvm_libc - -#endif // LLVM_LIBC_UTILS_HDRGEN_INCLUDE_COMMAND_H diff --git a/libc/utils/HdrGen/Main.cpp b/libc/utils/HdrGen/Main.cpp deleted file mode 100644 index f795e96e45c57..0000000000000 --- a/libc/utils/HdrGen/Main.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===-- "main" function of libc-hdrgen ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Generator.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/TableGen/Main.h" - -#include -#include - -static llvm::cl::opt - HeaderDefFile("def", llvm::cl::desc("Path to the .h.def file."), - llvm::cl::value_desc(""), llvm::cl::Required); -static llvm::cl::opt StandardHeader( - "header", - llvm::cl::desc("The standard header file which is to be generated."), - llvm::cl::value_desc("
")); -static llvm::cl::list EntrypointNamesOption( - "e", llvm::cl::value_desc(""), - llvm::cl::desc( - "Each --e is one entrypoint (generated from entrypoints.txt)"), - llvm::cl::OneOrMore); -static llvm::cl::list ReplacementValues( - "args", llvm::cl::desc("Command separated = pairs."), - llvm::cl::value_desc("[,name=value]")); -static llvm::cl::opt ExportDecls( - "export-decls", - llvm::cl::desc("Output a new header containing only the entrypoints.")); - -static void -ParseArgValuePairs(std::unordered_map &Map) { - for (std::string &R : ReplacementValues) { - auto Pair = llvm::StringRef(R).split('='); - Map[std::string(Pair.first)] = std::string(Pair.second); - } -} - -static bool HeaderGeneratorMain(llvm::raw_ostream &OS, - const llvm::RecordKeeper &Records) { - std::unordered_map ArgMap; - ParseArgValuePairs(ArgMap); - llvm_libc::Generator G(HeaderDefFile, EntrypointNamesOption, StandardHeader, - ArgMap); - if (ExportDecls) - G.generateDecls(OS, Records); - else - G.generate(OS, Records); - - return false; -} - -int main(int argc, char *argv[]) { - llvm::cl::ParseCommandLineOptions(argc, argv); - return TableGenMain(argv[0], &HeaderGeneratorMain); -} diff --git a/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt b/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt deleted file mode 100644 index 9e25c21c6b359..0000000000000 --- a/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_tablegen(libc-prototype-testgen LLVM_LIBC - PrototypeTestGen.cpp -) -target_link_libraries(libc-prototype-testgen PRIVATE LibcTableGenUtil) -target_include_directories(libc-prototype-testgen PRIVATE ${LIBC_SOURCE_DIR}) diff --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp deleted file mode 100644 index 7acaf75c4c1c8..0000000000000 --- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp +++ /dev/null @@ -1,106 +0,0 @@ -//===-- PrototypeTestGen.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "utils/LibcTableGenUtil/APIIndexer.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/TableGen/Main.h" -#include "llvm/TableGen/Record.h" - -namespace { - -llvm::cl::list - EntrypointNamesOption("e", llvm::cl::desc(""), - llvm::cl::OneOrMore); - -} // anonymous namespace - -bool TestGeneratorMain(llvm::raw_ostream &OS, - const llvm::RecordKeeper &records) { - OS << "#include \"src/__support/CPP/type_traits.h\"\n"; - llvm_libc::APIIndexer G(records); - std::unordered_set headerFileSet; - for (const auto &entrypoint : EntrypointNamesOption) { - if (entrypoint == "errno") - continue; - auto match = G.FunctionToHeaderMap.find(entrypoint); - if (match == G.FunctionToHeaderMap.end()) { - auto objectMatch = G.ObjectToHeaderMap.find(entrypoint); - if (objectMatch != G.ObjectToHeaderMap.end()) { - headerFileSet.insert(objectMatch->second); - continue; - } - - llvm::errs() << "ERROR: entrypoint '" << entrypoint - << "' could not be found in spec in any public header\n"; - return true; - } - headerFileSet.insert(match->second); - } - for (const auto &header : headerFileSet) - OS << "#include <" << header << ">\n"; - - OS << '\n'; - - OS << "extern \"C\" int main() {\n"; - for (const auto &entrypoint : EntrypointNamesOption) { - if (entrypoint == "errno") - continue; - auto match = G.FunctionSpecMap.find(entrypoint); - if (match == G.FunctionSpecMap.end()) { - auto objectMatch = G.ObjectSpecMap.find(entrypoint); - if (objectMatch != G.ObjectSpecMap.end()) { - auto entrypointPtr = entrypoint + "_ptr"; - llvm::Record *objectSpec = G.ObjectSpecMap[entrypoint]; - auto objectType = objectSpec->getValueAsString("Type"); - // We just make sure that the global object is present. - OS << " " << objectType << " *" << entrypointPtr << " = &" - << entrypoint << ";\n"; - OS << " ++" << entrypointPtr << ";\n"; // To avoid unused var warning. - continue; - } - llvm::errs() << "ERROR: entrypoint '" << entrypoint - << "' could not be found in spec in any public header\n"; - return true; - } - llvm::Record *functionSpec = match->second; - llvm::Record *retValSpec = functionSpec->getValueAsDef("Return"); - std::string returnType = - G.getTypeAsString(retValSpec->getValueAsDef("ReturnType")); - // _Noreturn is an indication for the compiler that a function - // doesn't return, and isn't a type understood by c++ templates. - if (llvm::StringRef(returnType).contains("_Noreturn")) - returnType = "void"; - - OS << " static_assert(LIBC_NAMESPACE::cpp::is_same_v<" << returnType - << '('; - auto args = functionSpec->getValueAsListOfDefs("Args"); - for (size_t i = 0, size = args.size(); i < size; ++i) { - llvm::Record *argType = args[i]->getValueAsDef("ArgType"); - OS << G.getTypeAsString(argType); - if (i < size - 1) - OS << ", "; - } - OS << ") __NOEXCEPT, decltype(" << entrypoint << ")>, "; - OS << '"' << entrypoint - << " prototype in TableGen does not match public header" << '"'; - OS << ");\n"; - } - - OS << '\n'; - OS << " return 0;\n"; - OS << "}\n\n"; - - return false; -} - -int main(int argc, char *argv[]) { - llvm::cl::ParseCommandLineOptions(argc, argv); - return TableGenMain(argv[0], TestGeneratorMain); -} diff --git a/libc/utils/HdrGen/PublicAPICommand.cpp b/libc/utils/HdrGen/PublicAPICommand.cpp deleted file mode 100644 index 5c46c82a23853..0000000000000 --- a/libc/utils/HdrGen/PublicAPICommand.cpp +++ /dev/null @@ -1,331 +0,0 @@ -//===-- Implementation of PublicAPICommand --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PublicAPICommand.h" - -#include "utils/LibcTableGenUtil/APIIndexer.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/TableGen/Record.h" -#include -#include - -// Text blocks for macro definitions and type decls can be indented to -// suit the surrounding tablegen listing. We need to dedent such blocks -// before writing them out. -static void dedentAndWrite(llvm::StringRef Text, llvm::raw_ostream &OS) { - llvm::SmallVector Lines; - llvm::SplitString(Text, Lines, "\n"); - size_t shortest_indent = 1024; - for (llvm::StringRef L : Lines) { - llvm::StringRef Indent = L.take_while([](char c) { return c == ' '; }); - size_t IndentSize = Indent.size(); - if (Indent.size() == L.size()) { - // Line is all spaces so no point noting the indent. - continue; - } - if (IndentSize < shortest_indent) - shortest_indent = IndentSize; - } - for (llvm::StringRef L : Lines) { - if (L.size() >= shortest_indent) - OS << L.drop_front(shortest_indent) << '\n'; - } -} - -static std::string getTypeHdrName(const std::string &Name) { - llvm::SmallVector Parts; - llvm::SplitString(llvm::StringRef(Name), Parts); - return llvm::join(Parts.begin(), Parts.end(), "_"); -} - -namespace llvm_libc { - -static bool isAsciiStart(char C) { - return (C >= 'A' && C <= 'Z') || (C >= 'a' && C <= 'z') || C == '_'; -} - -static bool isAsciiContinue(char C) { - return isAsciiStart(C) || (C >= '0' && C <= '9'); -} - -static bool isAsciiIdentifier(llvm::StringRef S) { - if (S.empty()) - return false; - if (!isAsciiStart(S[0])) - return false; - for (char C : S.drop_front()) - if (!isAsciiContinue(C)) - return false; - return true; -} - -static AttributeStyle getAttributeStyle(const llvm::Record *Instance) { - llvm::StringRef Style = Instance->getValueAsString("Style"); - return llvm::StringSwitch(Style) - .Case("cxx11", AttributeStyle::Cxx11) - .Case("gnu", AttributeStyle::Gnu) - .Case("declspec", AttributeStyle::Declspec) - .Default(AttributeStyle::Gnu); -} - -static AttributeNamespace getAttributeNamespace(const llvm::Record *Instance) { - llvm::StringRef Namespace = Instance->getValueAsString("Namespace"); - return llvm::StringSwitch(Namespace) - .Case("clang", AttributeNamespace::Clang) - .Case("gnu", AttributeNamespace::Gnu) - .Default(AttributeNamespace::None); -} - -using AttributeMap = llvm::DenseMap; - -template -static AttributeMap collectAttributeMacros(const SpecMap &Spec, - const FuncList &Funcs) { - llvm::DenseMap MacroAttr; - for (const auto &Name : Funcs) { - auto Iter = Spec.find(Name); - if (Iter == Spec.end()) - continue; - - const llvm::Record *FunctionSpec = Iter->second; - for (const llvm::Record *Attr : - FunctionSpec->getValueAsListOfDefs("Attributes")) - MacroAttr[Attr->getValueAsString("Macro")] = Attr; - } - return MacroAttr; -} - -static void emitAttributeMacroDecls(const AttributeMap &MacroAttr, - llvm::raw_ostream &OS) { - for (auto &[Macro, Attr] : MacroAttr) { - std::vector Instances = - Attr->getValueAsListOfDefs("Instances"); - llvm::SmallVector> Styles; - std::transform(Instances.begin(), Instances.end(), - std::back_inserter(Styles), - [&](const llvm::Record *Instance) - -> std::pair { - auto Style = getAttributeStyle(Instance); - return {Style, Instance}; - }); - // 1. If __cplusplus is defined and cxx11 style is provided, define the - // macro using cxx11 version with the following priority: - // 1a. If there is no namespace (so the macro is supposed to be - // compiler-independent), use this version first. This macro will be - // tested via __has_cpp_attribute. - // 1b. If the attribute is a clang attribute, check for __clang__. - // 1c. If the attribute is a gnu attribute, check for __GNUC__. - // 2. Otherwise, if __GNUC__ is defined and gnu style is provided, - // define the macro using gnu version; - // 3. Otherwise, if _MSC_VER is defined and __declspec is provided, define - // the macro using __declspec version; - // 4. Fallback to empty macro. - std::sort(Styles.begin(), Styles.end(), [&](auto &a, auto &b) { - if (a.first == AttributeStyle::Cxx11 && b.first == AttributeStyle::Cxx11) - return getAttributeNamespace(a.second) < - getAttributeNamespace(b.second); - return a.first < b.first; - }); - for (auto &[Style, Instance] : Styles) { - llvm::StringRef Attr = Instance->getValueAsString("Attr"); - if (Style == AttributeStyle::Cxx11) { - OS << "#if !defined(" << Macro << ") && defined(__cplusplus)"; - AttributeNamespace Namespace = getAttributeNamespace(Instance); - if (Namespace == AttributeNamespace::Clang) - OS << " && defined(__clang__)\n"; - else if (Namespace == AttributeNamespace::Gnu) - OS << " && defined(__GNUC__)\n"; - else - OS << '\n'; - if (isAsciiIdentifier(Attr) && Namespace != AttributeNamespace::None) - OS << "#if __has_attribute(" << Attr << ")\n"; - else - OS << "#if __has_cpp_attribute(" << Attr << ")\n"; - OS << "#define " << Macro << " [["; - if (Namespace == AttributeNamespace::Clang) - OS << "clang::"; - else if (Namespace == AttributeNamespace::Gnu) - OS << "gnu::"; - OS << Attr << "]]\n"; - if (isAsciiIdentifier(Attr)) - OS << "#endif\n"; - OS << "#endif\n"; - } - if (Style == AttributeStyle::Gnu) { - OS << "#if !defined(" << Macro << ") && defined(__GNUC__)\n"; - if (isAsciiIdentifier(Attr)) - OS << "#if __has_attribute(" << Attr << ")\n"; - OS << "#define " << Macro << " __attribute__(("; - OS << Attr << "))\n"; - if (isAsciiIdentifier(Attr)) - OS << "#endif\n"; - OS << "#endif\n"; - } - if (Style == AttributeStyle::Declspec) { - OS << "#if !defined(" << Macro << ") && defined(_MSC_VER)\n"; - OS << "#define " << Macro << " __declspec("; - OS << Attr << ")\n"; - OS << "#endif\n"; - } - } - OS << "#if !defined(" << Macro << ")\n"; - OS << "#define " << Macro << '\n'; - OS << "#endif\n"; - } - - if (!MacroAttr.empty()) - OS << '\n'; -} - -static void emitAttributeMacroForFunction(const llvm::Record *FunctionSpec, - llvm::raw_ostream &OS) { - std::vector Attributes = - FunctionSpec->getValueAsListOfDefs("Attributes"); - llvm::interleave( - Attributes.begin(), Attributes.end(), - [&](const llvm::Record *Attr) { OS << Attr->getValueAsString("Macro"); }, - [&]() { OS << ' '; }); - if (!Attributes.empty()) - OS << ' '; -} - -static void emitUndefsForAttributeMacros(const AttributeMap &MacroAttr, - llvm::raw_ostream &OS) { - if (!MacroAttr.empty()) - OS << '\n'; - for (auto &[Macro, Attr] : MacroAttr) - OS << "#undef " << Macro << '\n'; -} - -static void writeAPIFromIndex(APIIndexer &G, - std::vector EntrypointNameList, - llvm::raw_ostream &OS) { - for (auto &Pair : G.MacroDefsMap) { - const std::string &Name = Pair.first; - if (!G.MacroSpecMap.count(Name)) - llvm::PrintFatalError(Name + " not found in any standard spec.\n"); - - const llvm::Record *MacroDef = Pair.second; - dedentAndWrite(MacroDef->getValueAsString("Defn"), OS); - - OS << '\n'; - } - - for (auto &TypeName : G.RequiredTypes) { - if (!G.TypeSpecMap.count(TypeName)) - llvm::PrintFatalError(TypeName + " not found in any standard spec.\n"); - OS << "#include \n"; - } - OS << '\n'; - - if (G.Enumerations.size() != 0) - OS << "enum {" << '\n'; - for (const auto &Name : G.Enumerations) { - if (!G.EnumerationSpecMap.count(Name)) - llvm::PrintFatalError( - Name + " is not listed as an enumeration in any standard spec.\n"); - - const llvm::Record *EnumerationSpec = G.EnumerationSpecMap[Name]; - OS << " " << EnumerationSpec->getValueAsString("Name"); - auto Value = EnumerationSpec->getValueAsString("Value"); - if (Value == "__default__") { - OS << ",\n"; - } else { - OS << " = " << Value << ",\n"; - } - } - if (G.Enumerations.size() != 0) - OS << "};\n\n"; - - // Collect and declare macros for attributes - AttributeMap MacroAttr = - collectAttributeMacros(G.FunctionSpecMap, EntrypointNameList); - emitAttributeMacroDecls(MacroAttr, OS); - - OS << "__BEGIN_C_DECLS\n\n"; - for (auto &Name : EntrypointNameList) { - auto Iter = G.FunctionSpecMap.find(Name); - - // Functions that aren't in this header file are skipped as - // opposed to erroring out because the list of functions being - // iterated over is the complete list of functions with - // entrypoints. Thus this is filtering out the functions that - // don't go to this header file, whereas the other, similar - // conditionals above are more of a sanity check. - if (Iter == G.FunctionSpecMap.end()) - continue; - - const llvm::Record *FunctionSpec = Iter->second; - const llvm::Record *RetValSpec = FunctionSpec->getValueAsDef("Return"); - const llvm::Record *ReturnType = RetValSpec->getValueAsDef("ReturnType"); - - // TODO: https://github.com/llvm/llvm-project/issues/81208 - // Ideally, we should group functions based on their guarding macros. - bool Guarded = - (FunctionSpec->getType()->getAsString() == "GuardedFunctionSpec"); - - if (Guarded) - OS << "#ifdef " << FunctionSpec->getValueAsString("Guard") << "\n"; - - // Emit attribute macros for the function. Space is automatically added. - emitAttributeMacroForFunction(FunctionSpec, OS); - OS << G.getTypeAsString(ReturnType) << " " << Name << "("; - - auto ArgsList = FunctionSpec->getValueAsListOfDefs("Args"); - for (size_t i = 0; i < ArgsList.size(); ++i) { - const llvm::Record *ArgType = ArgsList[i]->getValueAsDef("ArgType"); - OS << G.getTypeAsString(ArgType); - if (i < ArgsList.size() - 1) - OS << ", "; - } - - OS << ") __NOEXCEPT;\n"; - - if (Guarded) - OS << "#endif // " << FunctionSpec->getValueAsString("Guard") << "\n"; - - OS << "\n"; - } - - // Make another pass over entrypoints to emit object declarations. - for (const auto &Name : EntrypointNameList) { - auto Iter = G.ObjectSpecMap.find(Name); - if (Iter == G.ObjectSpecMap.end()) - continue; - const llvm::Record *ObjectSpec = Iter->second; - auto Type = ObjectSpec->getValueAsString("Type"); - OS << "extern " << Type << " " << Name << ";\n"; - } - OS << "__END_C_DECLS\n"; - - // Undef file-level attribute macros. - emitUndefsForAttributeMacros(MacroAttr, OS); -} - -void writePublicAPI(llvm::raw_ostream &OS, const llvm::RecordKeeper &Records) {} - -const char PublicAPICommand::Name[] = "public_api"; - -void PublicAPICommand::run(llvm::raw_ostream &OS, const ArgVector &Args, - llvm::StringRef StdHeader, - const llvm::RecordKeeper &Records, - const Command::ErrorReporter &Reporter) const { - if (Args.size() != 0) - Reporter.printFatalError("public_api command does not take any arguments."); - - APIIndexer G(StdHeader, Records); - writeAPIFromIndex(G, EntrypointNameList, OS); -} - -} // namespace llvm_libc diff --git a/libc/utils/HdrGen/PublicAPICommand.h b/libc/utils/HdrGen/PublicAPICommand.h deleted file mode 100644 index 49078f4857f90..0000000000000 --- a/libc/utils/HdrGen/PublicAPICommand.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- Implementation of PublicAPICommand ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_UTILS_HDRGEN_PUBLICAPICOMMAND_H -#define LLVM_LIBC_UTILS_HDRGEN_PUBLICAPICOMMAND_H - -#include "Command.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" - -namespace llvm { - -class raw_ostream; -class Record; -class RecordKeeper; - -} // namespace llvm - -namespace llvm_libc { - -enum class AttributeStyle { Cxx11 = 0, Gnu = 1, Declspec = 2 }; -enum class AttributeNamespace { None = 0, Clang = 1, Gnu = 2 }; - -class PublicAPICommand : public Command { -private: - const std::vector &EntrypointNameList; - -public: - static const char Name[]; - - PublicAPICommand(const std::vector &EntrypointNames) - : EntrypointNameList(EntrypointNames) {} - - void run(llvm::raw_ostream &OS, const ArgVector &Args, - llvm::StringRef StdHeader, const llvm::RecordKeeper &Records, - const Command::ErrorReporter &Reporter) const override; -}; - -} // namespace llvm_libc - -#endif // LLVM_LIBC_UTILS_HDRGEN_PUBLICAPICOMMAND_H diff --git a/libc/utils/HdrGen/README.md b/libc/utils/HdrGen/README.md deleted file mode 100644 index a61cf3bacbb0e..0000000000000 --- a/libc/utils/HdrGen/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# The LLVM libc header generation system - -LLVM libc uses a header generation scheme to generate public as well as internal -header files. This directory contains the implementation of the header generator -which drives this header generation scheme. diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.cpp b/libc/utils/LibcTableGenUtil/APIIndexer.cpp deleted file mode 100644 index cf66d0a7aef6e..0000000000000 --- a/libc/utils/LibcTableGenUtil/APIIndexer.cpp +++ /dev/null @@ -1,173 +0,0 @@ -//===-- Implementation of APIIndexer class --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "APIIndexer.h" - -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" - -namespace llvm_libc { - -static const char NamedTypeClassName[] = "NamedType"; -static const char PtrTypeClassName[] = "PtrType"; -static const char RestrictedPtrTypeClassName[] = "RestrictedPtrType"; -static const char ConstTypeClassName[] = "ConstType"; -static const char StructTypeClassName[] = "Struct"; - -static const char StandardSpecClassName[] = "StandardSpec"; -static const char PublicAPIClassName[] = "PublicAPI"; - -static bool isa(const llvm::Record *Def, const llvm::Record *TypeClass) { - const llvm::RecordRecTy *RecordType = Def->getType(); - llvm::ArrayRef Classes = RecordType->getClasses(); - // We want exact types. That is, we don't want the classes listed in - // spec.td to be subclassed. Hence, we do not want the record |Def| - // to be of more than one class type.. - if (Classes.size() != 1) - return false; - return Classes[0] == TypeClass; -} - -bool APIIndexer::isaNamedType(const llvm::Record *Def) { - return isa(Def, NamedTypeClass); -} - -bool APIIndexer::isaStructType(const llvm::Record *Def) { - return isa(Def, StructClass); -} - -bool APIIndexer::isaPtrType(const llvm::Record *Def) { - return isa(Def, PtrTypeClass); -} - -bool APIIndexer::isaConstType(const llvm::Record *Def) { - return isa(Def, ConstTypeClass); -} - -bool APIIndexer::isaRestrictedPtrType(const llvm::Record *Def) { - return isa(Def, RestrictedPtrTypeClass); -} - -bool APIIndexer::isaStandardSpec(const llvm::Record *Def) { - return isa(Def, StandardSpecClass); -} - -bool APIIndexer::isaPublicAPI(const llvm::Record *Def) { - return isa(Def, PublicAPIClass); -} - -std::string APIIndexer::getTypeAsString(const llvm::Record *TypeRecord) { - if (isaNamedType(TypeRecord) || isaStructType(TypeRecord)) { - return std::string(TypeRecord->getValueAsString("Name")); - } else if (isaPtrType(TypeRecord)) { - return getTypeAsString(TypeRecord->getValueAsDef("PointeeType")) + " *"; - } else if (isaConstType(TypeRecord)) { - return std::string("const ") + - getTypeAsString(TypeRecord->getValueAsDef("UnqualifiedType")); - } else if (isaRestrictedPtrType(TypeRecord)) { - return getTypeAsString(TypeRecord->getValueAsDef("PointeeType")) + - " *__restrict"; - } else { - llvm::PrintFatalError(TypeRecord->getLoc(), "Invalid type.\n"); - } -} - -void APIIndexer::indexStandardSpecDef(const llvm::Record *StandardSpec) { - auto HeaderSpecList = StandardSpec->getValueAsListOfDefs("Headers"); - for (const llvm::Record *HeaderSpec : HeaderSpecList) { - llvm::StringRef Header = HeaderSpec->getValueAsString("Name"); - if (!StdHeader.has_value() || Header == StdHeader) { - PublicHeaders.emplace(Header); - auto MacroSpecList = HeaderSpec->getValueAsListOfDefs("Macros"); - // TODO: Trigger a fatal error on duplicate specs. - for (const llvm::Record *MacroSpec : MacroSpecList) - MacroSpecMap[std::string(MacroSpec->getValueAsString("Name"))] = - MacroSpec; - - auto TypeSpecList = HeaderSpec->getValueAsListOfDefs("Types"); - for (const llvm::Record *TypeSpec : TypeSpecList) - TypeSpecMap[std::string(TypeSpec->getValueAsString("Name"))] = TypeSpec; - - auto FunctionSpecList = HeaderSpec->getValueAsListOfDefs("Functions"); - for (const llvm::Record *FunctionSpec : FunctionSpecList) { - auto FunctionName = std::string(FunctionSpec->getValueAsString("Name")); - FunctionSpecMap[FunctionName] = FunctionSpec; - FunctionToHeaderMap[FunctionName] = std::string(Header); - } - - auto EnumerationSpecList = - HeaderSpec->getValueAsListOfDefs("Enumerations"); - for (const llvm::Record *EnumerationSpec : EnumerationSpecList) { - EnumerationSpecMap[std::string( - EnumerationSpec->getValueAsString("Name"))] = EnumerationSpec; - } - - auto ObjectSpecList = HeaderSpec->getValueAsListOfDefs("Objects"); - for (const llvm::Record *ObjectSpec : ObjectSpecList) { - auto ObjectName = std::string(ObjectSpec->getValueAsString("Name")); - ObjectSpecMap[ObjectName] = ObjectSpec; - ObjectToHeaderMap[ObjectName] = std::string(Header); - } - } - } -} - -void APIIndexer::indexPublicAPIDef(const llvm::Record *PublicAPI) { - // While indexing the public API, we do not check if any of the entities - // requested is from an included standard. Such a check is done while - // generating the API. - auto MacroDefList = PublicAPI->getValueAsListOfDefs("Macros"); - for (const llvm::Record *MacroDef : MacroDefList) - MacroDefsMap[std::string(MacroDef->getValueAsString("Name"))] = MacroDef; - - auto TypeList = PublicAPI->getValueAsListOfStrings("Types"); - for (llvm::StringRef TypeName : TypeList) - RequiredTypes.insert(std::string(TypeName)); - - auto StructList = PublicAPI->getValueAsListOfStrings("Structs"); - for (llvm::StringRef StructName : StructList) - Structs.insert(std::string(StructName)); - - auto FunctionList = PublicAPI->getValueAsListOfStrings("Functions"); - for (llvm::StringRef FunctionName : FunctionList) - Functions.insert(std::string(FunctionName)); - - auto EnumerationList = PublicAPI->getValueAsListOfStrings("Enumerations"); - for (llvm::StringRef EnumerationName : EnumerationList) - Enumerations.insert(std::string(EnumerationName)); - - auto ObjectList = PublicAPI->getValueAsListOfStrings("Objects"); - for (llvm::StringRef ObjectName : ObjectList) - Objects.insert(std::string(ObjectName)); -} - -void APIIndexer::index(const llvm::RecordKeeper &Records) { - NamedTypeClass = Records.getClass(NamedTypeClassName); - PtrTypeClass = Records.getClass(PtrTypeClassName); - RestrictedPtrTypeClass = Records.getClass(RestrictedPtrTypeClassName); - StructClass = Records.getClass(StructTypeClassName); - ConstTypeClass = Records.getClass(ConstTypeClassName); - StandardSpecClass = Records.getClass(StandardSpecClassName); - PublicAPIClass = Records.getClass(PublicAPIClassName); - - const auto &DefsMap = Records.getDefs(); - for (auto &Pair : DefsMap) { - const llvm::Record *Def = Pair.second.get(); - if (isaStandardSpec(Def)) - indexStandardSpecDef(Def); - if (isaPublicAPI(Def)) { - if (!StdHeader.has_value() || - Def->getValueAsString("HeaderName") == StdHeader) - indexPublicAPIDef(Def); - } - } -} - -} // namespace llvm_libc diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.h b/libc/utils/LibcTableGenUtil/APIIndexer.h deleted file mode 100644 index b8bca15ba131c..0000000000000 --- a/libc/utils/LibcTableGenUtil/APIIndexer.h +++ /dev/null @@ -1,86 +0,0 @@ -//===-- A class to index libc API listed in tablegen files ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_UTILS_LIBC_TABLE_GEN_UTILS_API_INDEXER_H -#define LLVM_LIBC_UTILS_LIBC_TABLE_GEN_UTILS_API_INDEXER_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/TableGen/Record.h" - -#include -#include -#include -#include - -namespace llvm_libc { - -class APIIndexer { -private: - std::optional StdHeader; - - // TableGen classes in spec.td. - const llvm::Record *NamedTypeClass; - const llvm::Record *PtrTypeClass; - const llvm::Record *RestrictedPtrTypeClass; - const llvm::Record *ConstTypeClass; - const llvm::Record *StructClass; - const llvm::Record *StandardSpecClass; - const llvm::Record *PublicAPIClass; - - bool isaNamedType(const llvm::Record *Def); - bool isaStructType(const llvm::Record *Def); - bool isaPtrType(const llvm::Record *Def); - bool isaConstType(const llvm::Record *Def); - bool isaRestrictedPtrType(const llvm::Record *Def); - bool isaStandardSpec(const llvm::Record *Def); - bool isaPublicAPI(const llvm::Record *Def); - - void indexStandardSpecDef(const llvm::Record *StandardSpec); - void indexPublicAPIDef(const llvm::Record *PublicAPI); - void index(const llvm::RecordKeeper &Records); - -public: - using NameToRecordMapping = - std::unordered_map; - using NameSet = std::unordered_set; - - // This indexes all headers, not just a specified one. - explicit APIIndexer(const llvm::RecordKeeper &Records) - : StdHeader(std::nullopt) { - index(Records); - } - - APIIndexer(llvm::StringRef Header, const llvm::RecordKeeper &Records) - : StdHeader(Header) { - index(Records); - } - - // Mapping from names to records defining them. - NameToRecordMapping MacroSpecMap; - NameToRecordMapping TypeSpecMap; - NameToRecordMapping EnumerationSpecMap; - NameToRecordMapping FunctionSpecMap; - NameToRecordMapping MacroDefsMap; - NameToRecordMapping ObjectSpecMap; - - std::unordered_map FunctionToHeaderMap; - std::unordered_map ObjectToHeaderMap; - - NameSet RequiredTypes; - NameSet Structs; - NameSet Enumerations; - NameSet Functions; - NameSet Objects; - NameSet PublicHeaders; - - std::string getTypeAsString(const llvm::Record *TypeRecord); -}; - -} // namespace llvm_libc - -#endif // LLVM_LIBC_UTILS_LIBC_TABLE_GEN_UTILS_API_INDEXER_H diff --git a/libc/utils/LibcTableGenUtil/CMakeLists.txt b/libc/utils/LibcTableGenUtil/CMakeLists.txt deleted file mode 100644 index 9421383394a35..0000000000000 --- a/libc/utils/LibcTableGenUtil/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -if (NOT LLVM_LINK_LLVM_DYLIB) - set(flags "DISABLE_LLVM_LINK_LLVM_DYLIB;LINK_COMPONENTS;Support;TableGen") -else() - set(flags "LINK_COMPONENTS;TableGen") -endif() -add_llvm_library( - LibcTableGenUtil - APIIndexer.cpp - APIIndexer.h - ${flags} -) -target_include_directories(LibcTableGenUtil PUBLIC ${LIBC_SOURCE_DIR}) -target_include_directories(LibcTableGenUtil PRIVATE ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 569c01b61daac..f14065ab03799 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -186,13 +186,8 @@ if(LIBC_GPU_BUILD) list(APPEND RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "libc") endif() -set(NEED_LIBC_HDRGEN FALSE) -if("libc" IN_LIST LLVM_ENABLE_RUNTIMES) - set(NEED_LIBC_HDRGEN TRUE) -endif() foreach(_name ${LLVM_RUNTIME_TARGETS}) if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES) - set(NEED_LIBC_HDRGEN TRUE) if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda") set(LLVM_LIBC_GPU_BUILD ON) endif() @@ -202,27 +197,11 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda") set(LLVM_LIBC_GPU_BUILD ON) endif() -if(NEED_LIBC_HDRGEN) - # To build the libc runtime, we need to be able to build few libc build - # tools from the "libc" project. So, we add it to the list of enabled - # projects. - if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS) - message(STATUS "Enabling libc project to build libc build tools") - list(APPEND LLVM_ENABLE_PROJECTS "libc") - endif() +if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD) + message(STATUS "Enabling libc project to build libc testing tools") + list(APPEND LLVM_ENABLE_PROJECTS "libc") endif() -foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) - if("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) - # The 'libc' project bootstraps a few executables via the project build and - # should not emit an error currently. - if(NOT (NEED_LIBC_HDRGEN AND "${proj}" STREQUAL "libc")) - message(FATAL_ERROR "Runtime project \"${proj}\" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES.") - endif() - endif() -endforeach() -unset(NEED_LIBC_HDRGEN) - # LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the # `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for # several reasons: diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 57a56c6a60415..40fdb14e81333 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -520,24 +520,6 @@ if(build_runtimes) endif() endforeach() endif() - if("libc" IN_LIST LLVM_ENABLE_PROJECTS AND - (LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)) - if(LIBC_HDRGEN_EXE) - set(hdrgen_exe ${LIBC_HDRGEN_EXE}) - else() - if(TARGET ${LIBC_TABLEGEN_EXE}) - set(hdrgen_exe $) - else() - set(hdrgen_exe ${LIBC_TABLEGEN_EXE}) - endif() - set(hdrgen_deps ${LIBC_TABLEGEN_TARGET}) - endif() - if(NOT hdrgen_exe) - message(FATAL_ERROR "libc-hdrgen executable missing") - endif() - list(APPEND extra_cmake_args "-DLIBC_HDRGEN_EXE=${hdrgen_exe}") - list(APPEND extra_deps ${hdrgen_deps}) - endif() if(LLVM_LIBC_GPU_BUILD) list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON") if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES) From a0c4f854cad2b97e44a1b58dc1fd982e1c4d60f3 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 3 Dec 2024 12:36:04 -0800 Subject: [PATCH 123/191] [libc] Change ctype to be encoding independent (#110574) The previous implementation of the ctype functions assumed ASCII. This patch changes to a switch/case implementation that looks odd, but actually is easier for the compiler to understand and optimize. --- libc/src/__support/ctype_utils.h | 569 +++++++++++++++++- libc/src/__support/high_precision_decimal.h | 10 +- libc/src/__support/integer_literals.h | 28 +- libc/src/__support/integer_to_string.h | 7 +- libc/src/__support/str_to_float.h | 2 +- libc/src/__support/str_to_integer.h | 10 +- libc/src/ctype/isxdigit.cpp | 3 +- libc/src/ctype/isxdigit_l.cpp | 3 +- libc/src/ctype/toupper.cpp | 6 +- libc/src/ctype/toupper_l.cpp | 4 +- libc/src/stdio/printf_core/fixed_converter.h | 14 +- .../stdio/printf_core/float_dec_converter.h | 6 +- .../stdio/printf_core/float_hex_converter.h | 19 +- .../printf_core/float_inf_nan_converter.h | 9 +- libc/src/stdio/printf_core/int_converter.h | 21 +- libc/src/stdio/scanf_core/converter_utils.h | 10 - libc/src/stdio/scanf_core/float_converter.cpp | 18 +- libc/src/stdio/scanf_core/int_converter.cpp | 12 +- libc/src/stdio/scanf_core/ptr_converter.cpp | 4 +- libc/test/UnitTest/MemoryMatcher.cpp | 4 +- .../src/__support/CPP/stringview_test.cpp | 5 +- libc/test/src/ctype/isalnum_test.cpp | 37 +- libc/test/src/ctype/isalpha_test.cpp | 30 +- libc/test/src/ctype/isdigit_test.cpp | 30 +- libc/test/src/ctype/islower_test.cpp | 32 +- libc/test/src/ctype/isupper_test.cpp | 32 +- libc/test/src/ctype/isxdigit_test.cpp | 34 +- libc/test/src/ctype/tolower_test.cpp | 43 +- libc/test/src/ctype/toupper_test.cpp | 43 +- libc/test/src/stdlib/StrtolTest.h | 27 +- libc/test/src/string/strcmp_test.cpp | 20 +- .../llvm-project-overlay/libc/BUILD.bazel | 3 + .../libc/test/UnitTest/BUILD.bazel | 5 +- 33 files changed, 915 insertions(+), 185 deletions(-) diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index 91f6ce8cabd8d..be0f25330af9e 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -15,44 +15,567 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// ------------------------------------------------------ -// Rationale: Since these classification functions are -// called in other functions, we will avoid the overhead -// of a function call by inlining them. -// ------------------------------------------------------ +// ----------------------------------------------------------------------------- +// ****************** WARNING ****************** +// ****************** DO NOT TRY TO OPTIMIZE THESE FUNCTIONS! ****************** +// ----------------------------------------------------------------------------- +// This switch/case form is easier for the compiler to understand, and is +// optimized into a form that is almost always the same as or better than +// versions written by hand (see https://godbolt.org/z/qvrebqvvr). Also this +// form makes these functions encoding independent. If you want to rewrite these +// functions, make sure you have benchmarks to show your new solution is faster, +// as well as a way to support non-ASCII character encodings. -LIBC_INLINE static constexpr bool isalpha(unsigned ch) { - return (ch | 32) - 'a' < 26; +// Similarly, do not change these functions to use case ranges. e.g. +// bool islower(int ch) { +// switch(ch) { +// case 'a'...'z': +// return true; +// } +// } +// This assumes the character ranges are contiguous, which they aren't in +// EBCDIC. Technically we could use some smaller ranges, but that's even harder +// to read. + +LIBC_INLINE static constexpr bool islower(int ch) { + switch (ch) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isdigit(unsigned ch) { - return (ch - '0') < 10; +LIBC_INLINE static constexpr bool isupper(int ch) { + switch (ch) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isalnum(unsigned ch) { - return isalpha(ch) || isdigit(ch); +LIBC_INLINE static constexpr bool isdigit(int ch) { + switch (ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isgraph(unsigned ch) { - return 0x20 < ch && ch < 0x7f; +LIBC_INLINE static constexpr int tolower(int ch) { + switch (ch) { + case 'A': + return 'a'; + case 'B': + return 'b'; + case 'C': + return 'c'; + case 'D': + return 'd'; + case 'E': + return 'e'; + case 'F': + return 'f'; + case 'G': + return 'g'; + case 'H': + return 'h'; + case 'I': + return 'i'; + case 'J': + return 'j'; + case 'K': + return 'k'; + case 'L': + return 'l'; + case 'M': + return 'm'; + case 'N': + return 'n'; + case 'O': + return 'o'; + case 'P': + return 'p'; + case 'Q': + return 'q'; + case 'R': + return 'r'; + case 'S': + return 's'; + case 'T': + return 't'; + case 'U': + return 'u'; + case 'V': + return 'v'; + case 'W': + return 'w'; + case 'X': + return 'x'; + case 'Y': + return 'y'; + case 'Z': + return 'z'; + default: + return ch; + } } -LIBC_INLINE static constexpr bool islower(unsigned ch) { - return (ch - 'a') < 26; +LIBC_INLINE static constexpr int toupper(int ch) { + switch (ch) { + case 'a': + return 'A'; + case 'b': + return 'B'; + case 'c': + return 'C'; + case 'd': + return 'D'; + case 'e': + return 'E'; + case 'f': + return 'F'; + case 'g': + return 'G'; + case 'h': + return 'H'; + case 'i': + return 'I'; + case 'j': + return 'J'; + case 'k': + return 'K'; + case 'l': + return 'L'; + case 'm': + return 'M'; + case 'n': + return 'N'; + case 'o': + return 'O'; + case 'p': + return 'P'; + case 'q': + return 'Q'; + case 'r': + return 'R'; + case 's': + return 'S'; + case 't': + return 'T'; + case 'u': + return 'U'; + case 'v': + return 'V'; + case 'w': + return 'W'; + case 'x': + return 'X'; + case 'y': + return 'Y'; + case 'z': + return 'Z'; + default: + return ch; + } } -LIBC_INLINE static constexpr bool isupper(unsigned ch) { - return (ch - 'A') < 26; +LIBC_INLINE static constexpr bool isalpha(int ch) { + switch (ch) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isspace(unsigned ch) { - return ch == ' ' || (ch - '\t') < 5; +LIBC_INLINE static constexpr bool isalnum(int ch) { + switch (ch) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr int tolower(int ch) { - if (isupper(ch)) - return ch + ('a' - 'A'); - return ch; +LIBC_INLINE static constexpr int b36_char_to_int(int ch) { + switch (ch) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + case 'a': + case 'A': + return 10; + case 'b': + case 'B': + return 11; + case 'c': + case 'C': + return 12; + case 'd': + case 'D': + return 13; + case 'e': + case 'E': + return 14; + case 'f': + case 'F': + return 15; + case 'g': + case 'G': + return 16; + case 'h': + case 'H': + return 17; + case 'i': + case 'I': + return 18; + case 'j': + case 'J': + return 19; + case 'k': + case 'K': + return 20; + case 'l': + case 'L': + return 21; + case 'm': + case 'M': + return 22; + case 'n': + case 'N': + return 23; + case 'o': + case 'O': + return 24; + case 'p': + case 'P': + return 25; + case 'q': + case 'Q': + return 26; + case 'r': + case 'R': + return 27; + case 's': + case 'S': + return 28; + case 't': + case 'T': + return 29; + case 'u': + case 'U': + return 30; + case 'v': + case 'V': + return 31; + case 'w': + case 'W': + return 32; + case 'x': + case 'X': + return 33; + case 'y': + case 'Y': + return 34; + case 'z': + case 'Z': + return 35; + default: + return 0; + } +} + +LIBC_INLINE static constexpr int int_to_b36_char(int num) { + // Can't actually use LIBC_ASSERT here because it depends on integer_to_string + // which depends on this. + + // LIBC_ASSERT(num < 36); + switch (num) { + case 0: + return '0'; + case 1: + return '1'; + case 2: + return '2'; + case 3: + return '3'; + case 4: + return '4'; + case 5: + return '5'; + case 6: + return '6'; + case 7: + return '7'; + case 8: + return '8'; + case 9: + return '9'; + case 10: + return 'a'; + case 11: + return 'b'; + case 12: + return 'c'; + case 13: + return 'd'; + case 14: + return 'e'; + case 15: + return 'f'; + case 16: + return 'g'; + case 17: + return 'h'; + case 18: + return 'i'; + case 19: + return 'j'; + case 20: + return 'k'; + case 21: + return 'l'; + case 22: + return 'm'; + case 23: + return 'n'; + case 24: + return 'o'; + case 25: + return 'p'; + case 26: + return 'q'; + case 27: + return 'r'; + case 28: + return 's'; + case 29: + return 't'; + case 30: + return 'u'; + case 31: + return 'v'; + case 32: + return 'w'; + case 33: + return 'x'; + case 34: + return 'y'; + case 35: + return 'z'; + default: + return '!'; + } +} + +LIBC_INLINE static constexpr bool isspace(int ch) { + switch (ch) { + case ' ': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + return true; + default: + return false; + } +} + +// not yet encoding independent. +LIBC_INLINE static constexpr bool isgraph(int ch) { + return 0x20 < ch && ch < 0x7f; } } // namespace internal diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h index 20088d6d79791..922dce484aa6b 100644 --- a/libc/src/__support/high_precision_decimal.h +++ b/libc/src/__support/high_precision_decimal.h @@ -178,9 +178,11 @@ class HighPrecisionDecimal { if (digit_index >= this->num_digits) { return new_digits - 1; } - if (this->digits[digit_index] != power_of_five[digit_index] - '0') { + if (this->digits[digit_index] != + internal::b36_char_to_int(power_of_five[digit_index])) { return new_digits - - ((this->digits[digit_index] < power_of_five[digit_index] - '0') + ((this->digits[digit_index] < + internal::b36_char_to_int(power_of_five[digit_index])) ? 1 : 0); } @@ -337,8 +339,8 @@ class HighPrecisionDecimal { } ++total_digits; if (this->num_digits < MAX_NUM_DIGITS) { - this->digits[this->num_digits] = - static_cast(num_string[num_cur] - '0'); + this->digits[this->num_digits] = static_cast( + internal::b36_char_to_int(num_string[num_cur])); ++this->num_digits; } else if (num_string[num_cur] != '0') { this->truncated = true; diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index 4c5c4c4166681..0298ec7d088d6 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -13,12 +13,13 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H #define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H -#include "src/__support/CPP/limits.h" // CHAR_BIT +#include "src/__support/CPP/limits.h" // CHAR_BIT +#include "src/__support/ctype_utils.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" -#include "src/__support/uint128.h" // UInt128 -#include // size_t -#include // uintxx_t +#include "src/__support/uint128.h" // UInt128 +#include // size_t +#include // uintxx_t namespace LIBC_NAMESPACE_DECL { @@ -75,26 +76,13 @@ template struct DigitBuffer { push(*str); } - // Returns the digit for a particular character. - // Returns INVALID_DIGIT if the character is invalid. - LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) { - const auto to_lower = [](char c) { return c | 32; }; - const auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; - const auto is_alpha = [](char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); - }; - if (is_digit(c)) - return static_cast(c - '0'); - if (base > 10 && is_alpha(c)) - return static_cast(to_lower(c) - 'a' + 10); - return INVALID_DIGIT; - } - // Adds a single character to this buffer. LIBC_INLINE constexpr void push(char c) { if (c == '\'') return; // ' is valid but not taken into account. - const uint8_t value = get_digit_value(c); + const int b36_val = internal::b36_char_to_int(c); + const uint8_t value = static_cast( + b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT); if (value == INVALID_DIGIT || size >= MAX_DIGITS) { // During constant evaluation `__builtin_unreachable` will halt the // compiler as it is not executable. This is preferable over `assert` that diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 40d45a05ceadb..ea620087584cb 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -69,6 +69,7 @@ #include "src/__support/CPP/type_traits.h" #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { @@ -214,9 +215,9 @@ template class IntegerToString { using UNSIGNED_T = make_integral_or_big_int_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { - if (digit < 10) - return '0' + static_cast(digit); - return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast(digit - 10); + const int result = internal::int_to_b36_char(digit); + return static_cast(Fmt::IS_UPPERCASE ? internal::toupper(result) + : result); } LIBC_INLINE static void diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index 80ea334d15c03..b4d5646822df3 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, cpp::numeric_limits::max() / BASE; while (true) { if (isdigit(src[index])) { - uint32_t digit = src[index] - '0'; + uint32_t digit = b36_char_to_int(src[index]); seen_digit = true; if (mantissa < bitstype_max_div_by_base) { diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h index 86611f9a6902d..8e569e8a7feb0 100644 --- a/libc/src/__support/str_to_integer.h +++ b/libc/src/__support/str_to_integer.h @@ -42,14 +42,6 @@ first_non_whitespace(const char *__restrict src, return src + src_cur; } -LIBC_INLINE int b36_char_to_int(char input) { - if (isdigit(input)) - return input - '0'; - if (isalpha(input)) - return (input | 32) + 10 - 'a'; - return 0; -} - // checks if the next 3 characters of the string pointer are the start of a // hexadecimal number. Does not advance the string pointer. LIBC_INLINE bool @@ -57,7 +49,7 @@ is_hex_start(const char *__restrict src, size_t src_len = cpp::numeric_limits::max()) { if (src_len < 3) return false; - return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) && + return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; } diff --git a/libc/src/ctype/isxdigit.cpp b/libc/src/ctype/isxdigit.cpp index 6b730c354db08..81f645c6f49fc 100644 --- a/libc/src/ctype/isxdigit.cpp +++ b/libc/src/ctype/isxdigit.cpp @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) { const unsigned ch = static_cast(c); - return static_cast(internal::isdigit(ch) || (ch | 32) - 'a' < 6); + return static_cast(internal::isalnum(ch) && + internal::b36_char_to_int(ch) < 16); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isxdigit_l.cpp b/libc/src/ctype/isxdigit_l.cpp index 8a5c7d4d28ab1..eddfd20a2da3b 100644 --- a/libc/src/ctype/isxdigit_l.cpp +++ b/libc/src/ctype/isxdigit_l.cpp @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) { const unsigned ch = static_cast(c); - return static_cast(internal::isdigit(ch) || (ch | 32) - 'a' < 6); + return static_cast(internal::isalnum(ch) && + internal::b36_char_to_int(ch) < 16); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper.cpp b/libc/src/ctype/toupper.cpp index b5a23fc7f588b..1e1e8fc400711 100644 --- a/libc/src/ctype/toupper.cpp +++ b/libc/src/ctype/toupper.cpp @@ -14,10 +14,6 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, toupper, (int c)) { - if (internal::islower(c)) - return c - ('a' - 'A'); - return c; -} +LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper_l.cpp b/libc/src/ctype/toupper_l.cpp index f536ff3623616..a435ca1ab5d41 100644 --- a/libc/src/ctype/toupper_l.cpp +++ b/libc/src/ctype/toupper_l.cpp @@ -15,9 +15,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) { - if (internal::islower(c)) - return c - ('a' - 'A'); - return c; + return internal::toupper(c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h index c8812d77b62e3..ba0a62d9fcb87 100644 --- a/libc/src/stdio/printf_core/fixed_converter.h +++ b/libc/src/stdio/printf_core/fixed_converter.h @@ -11,6 +11,7 @@ #include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/string_view.h" +#include "src/__support/ctype_utils.h" #include "src/__support/fixed_point/fx_bits.h" #include "src/__support/fixed_point/fx_rep.h" #include "src/__support/integer_to_string.h" @@ -68,10 +69,6 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { using LARep = fixed_point::FXRep; using StorageType = LARep::StorageType; - // All of the letters will be defined relative to variable a, which will be - // the appropriate case based on the name of the conversion. This converts any - // conversion name into the letter 'a' with the appropriate case. - const char a = (to_conv.conv_name & 32) | 'A'; FormatFlags flags = to_conv.flags; bool is_negative; @@ -179,9 +176,9 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { // unspecified. RoundDirection round; char first_digit_after = fraction_digits[precision]; - if (first_digit_after > '5') { + if (internal::b36_char_to_int(first_digit_after) > 5) { round = RoundDirection::Up; - } else if (first_digit_after < '5') { + } else if (internal::b36_char_to_int(first_digit_after) < 5) { round = RoundDirection::Down; } else { // first_digit_after == '5' @@ -204,7 +201,8 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { keep_rounding = false; char cur_digit = fraction_digits[digit_to_round]; // if the digit should not be rounded up - if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) { + if (round == RoundDirection::Even && + (internal::b36_char_to_int(cur_digit) % 2) == 0) { // break out of the loop break; } @@ -246,7 +244,7 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { char sign_char = 0; // Check if the conv name is uppercase - if (a == 'A') { + if (internal::isupper(to_conv.conv_name)) { // These flags are only for signed conversions, so this removes them if the // conversion is unsigned. flags = FormatFlags(flags & diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index e39ba6ecea8d4..d93457fcafd7f 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -13,6 +13,7 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/big_int.h" // is_big_int_v +#include "src/__support/ctype_utils.h" #include "src/__support/float_to_string.h" #include "src/__support/integer_to_string.h" #include "src/__support/libc_assert.h" @@ -587,8 +588,6 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer, int exponent = float_bits.get_explicit_exponent(); StorageType mantissa = float_bits.get_explicit_mantissa(); - const char a = (to_conv.conv_name & 32) | 'A'; - char sign_char = 0; if (float_bits.is_neg()) @@ -734,7 +733,8 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer, round = get_round_direction(last_digit, truncated, float_bits.sign()); RET_IF_RESULT_NEGATIVE(float_writer.write_last_block( - digits, maximum, round, final_exponent, a + 'E' - 'A')); + digits, maximum, round, final_exponent, + internal::islower(to_conv.conv_name) ? 'e' : 'E')); RET_IF_RESULT_NEGATIVE(float_writer.right_pad()); return WRITE_OK; diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 0b3ff3dd1cbfd..b264b5cf20728 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -12,6 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" @@ -28,10 +29,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, const FormatSection &to_conv) { using LDBits = fputil::FPBits; using StorageType = LDBits::StorageType; - // All of the letters will be defined relative to variable a, which will be - // the appropriate case based on the name of the conversion. This converts any - // conversion name into the letter 'a' with the appropriate case. - const char a = (to_conv.conv_name & 32) | 'A'; bool is_negative; int exponent; @@ -138,9 +135,10 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t mant_cur = mant_len; size_t first_non_zero = 1; for (; mant_cur > 0; --mant_cur, mantissa >>= 4) { - char mant_mod_16 = static_cast(mantissa) & 15; - char new_digit = static_cast( - (mant_mod_16 > 9) ? (mant_mod_16 - 10 + a) : (mant_mod_16 + '0')); + char mant_mod_16 = static_cast(mantissa % 16); + char new_digit = static_cast(internal::int_to_b36_char(mant_mod_16)); + if (internal::isupper(to_conv.conv_name)) + new_digit = static_cast(internal::toupper(new_digit)); mant_buffer[mant_cur - 1] = new_digit; if (new_digit != '0' && first_non_zero < mant_cur) first_non_zero = mant_cur; @@ -168,7 +166,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t exp_cur = EXP_LEN; for (; exponent > 0; --exp_cur, exponent /= 10) { - exp_buffer[exp_cur - 1] = static_cast((exponent % 10) + '0'); + exp_buffer[exp_cur - 1] = + static_cast(internal::int_to_b36_char(exponent % 10)); } if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0. exp_buffer[EXP_LEN - 1] = '0'; @@ -187,7 +186,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, constexpr size_t PREFIX_LEN = 2; char prefix[PREFIX_LEN]; prefix[0] = '0'; - prefix[1] = a + ('x' - 'a'); + prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X'; const cpp::string_view prefix_str(prefix, PREFIX_LEN); // If the precision is greater than the actual result, pad with 0s @@ -200,7 +199,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, constexpr cpp::string_view HEXADECIMAL_POINT("."); // This is for the letter 'p' before the exponent. - const char exp_separator = a + ('p' - 'a'); + const char exp_separator = internal::islower(to_conv.conv_name) ? 'p' : 'P'; constexpr int EXP_SEPARATOR_LEN = 1; padding = static_cast(to_conv.min_width - (sign_char > 0 ? 1 : 0) - diff --git a/libc/src/stdio/printf_core/float_inf_nan_converter.h b/libc/src/stdio/printf_core/float_inf_nan_converter.h index a7da682b835be..3e41612e21c9f 100644 --- a/libc/src/stdio/printf_core/float_inf_nan_converter.h +++ b/libc/src/stdio/printf_core/float_inf_nan_converter.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_INF_NAN_CONVERTER_H #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" @@ -26,8 +27,6 @@ using StorageType = fputil::FPBits::StorageType; LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) { // All of the letters will be defined relative to variable a, which will be // the appropriate case based on the case of the conversion. - const char a = (to_conv.conv_name & 32) | 'A'; - bool is_negative; StorageType mantissa; if (to_conv.length_modifier == LengthModifier::L) { @@ -66,9 +65,11 @@ LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) { if (sign_char) RET_IF_RESULT_NEGATIVE(writer->write(sign_char)); if (mantissa == 0) { // inf - RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "inf" : "INF")); + RET_IF_RESULT_NEGATIVE( + writer->write(internal::islower(to_conv.conv_name) ? "inf" : "INF")); } else { // nan - RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "nan" : "NAN")); + RET_IF_RESULT_NEGATIVE( + writer->write(internal::islower(to_conv.conv_name) ? "nan" : "NAN")); } if (padding > 0 && ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) == diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h index f345e86b97a69..d0af229f89be5 100644 --- a/libc/src/stdio/printf_core/int_converter.h +++ b/libc/src/stdio/printf_core/int_converter.h @@ -11,6 +11,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" +#include "src/__support/ctype_utils.h" #include "src/__support/integer_to_string.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter_utils.h" @@ -23,11 +24,6 @@ namespace LIBC_NAMESPACE_DECL { namespace printf_core { -// These functions only work on characters that are already known to be in the -// alphabet. Their behavior is undefined otherwise. -LIBC_INLINE constexpr char to_lower(char a) { return a | 32; } -LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; } - namespace details { using HexFmt = IntegerToString; @@ -49,14 +45,14 @@ LIBC_INLINE constexpr size_t num_buf_size() { LIBC_INLINE cpp::optional num_to_strview(uintmax_t num, cpp::span bufref, char conv_name) { - if (to_lower(conv_name) == 'x') { - if (is_lower(conv_name)) + if (internal::tolower(conv_name) == 'x') { + if (internal::islower(conv_name)) return HexFmt::format_to(bufref, num); else return HexFmtUppercase::format_to(bufref, num); } else if (conv_name == 'o') { return OctFmt::format_to(bufref, num); - } else if (to_lower(conv_name) == 'b') { + } else if (internal::tolower(conv_name) == 'b') { return BinFmt::format_to(bufref, num); } else { return DecFmt::format_to(bufref, num); @@ -72,7 +68,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { uintmax_t num = static_cast(to_conv.conv_val_raw); bool is_negative = false; FormatFlags flags = to_conv.flags; - const char a = is_lower(to_conv.conv_name) ? 'a' : 'A'; // If the conversion is signed, then handle negative values. if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') { @@ -116,16 +111,16 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { // conversions. Since hexadecimal is unsigned these will never conflict. size_t prefix_len; char prefix[2]; - if ((to_lower(to_conv.conv_name) == 'x') && + if ((internal::tolower(to_conv.conv_name) == 'x') && ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) { prefix_len = 2; prefix[0] = '0'; - prefix[1] = a + ('x' - 'a'); - } else if ((to_lower(to_conv.conv_name) == 'b') && + prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X'; + } else if ((internal::tolower(to_conv.conv_name) == 'b') && ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) { prefix_len = 2; prefix[0] = '0'; - prefix[1] = a + ('b' - 'a'); + prefix[1] = internal::islower(to_conv.conv_name) ? 'b' : 'B'; } else { prefix_len = (sign_char == 0 ? 0 : 1); prefix[0] = sign_char; diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h index 61954556b838a..6f4d16cffb19c 100644 --- a/libc/src/stdio/scanf_core/converter_utils.h +++ b/libc/src/stdio/scanf_core/converter_utils.h @@ -19,16 +19,6 @@ namespace LIBC_NAMESPACE_DECL { namespace scanf_core { -LIBC_INLINE constexpr char to_lower(char a) { return a | 32; } - -LIBC_INLINE constexpr int b36_char_to_int(char input) { - if (internal::isdigit(input)) - return input - '0'; - if (internal::isalpha(input)) - return to_lower(input) + 10 - 'a'; - return 0; -} - LIBC_INLINE void write_int_with_length(uintmax_t output_val, const FormatSection &to_conv) { if ((to_conv.flags & NO_WRITE) != 0) { diff --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp index b2d60a249a5a7..9c714d0727214 100644 --- a/libc/src/stdio/scanf_core/float_converter.cpp +++ b/libc/src/stdio/scanf_core/float_converter.cpp @@ -55,11 +55,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { // Handle inf - if (to_lower(cur_char) == inf_string[0]) { + if (internal::tolower(cur_char) == inf_string[0]) { size_t inf_index = 0; - for (; inf_index < sizeof(inf_string) && out_str.length() < max_width && - to_lower(cur_char) == inf_string[inf_index]; + for (; + inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width && + internal::tolower(cur_char) == inf_string[inf_index]; ++inf_index) { if (!out_str.append(cur_char)) { return ALLOCATION_FAILURE; @@ -78,11 +79,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { static const char nan_string[] = "nan"; // Handle nan - if (to_lower(cur_char) == nan_string[0]) { + if (internal::tolower(cur_char) == nan_string[0]) { size_t nan_index = 0; - for (; nan_index < sizeof(nan_string) && out_str.length() < max_width && - to_lower(cur_char) == nan_string[nan_index]; + for (; + nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width && + internal::tolower(cur_char) == nan_string[nan_index]; ++nan_index) { if (!out_str.append(cur_char)) { return ALLOCATION_FAILURE; @@ -117,7 +119,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { } // If that next character is an 'x' then this is a hexadecimal number. - if (to_lower(cur_char) == 'x') { + if (internal::tolower(cur_char) == 'x') { base = 16; if (!out_str.append(cur_char)) { @@ -163,7 +165,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { // Handle the exponent, which has an exponent mark, an optional sign, and // decimal digits. - if (to_lower(cur_char) == exponent_mark) { + if (internal::tolower(cur_char) == exponent_mark) { if (!out_str.append(cur_char)) { return ALLOCATION_FAILURE; } diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp index ecdac52e84bbd..fce817245c010 100644 --- a/libc/src/stdio/scanf_core/int_converter.cpp +++ b/libc/src/stdio/scanf_core/int_converter.cpp @@ -80,7 +80,8 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { is_signed = true; } else if (to_conv.conv_name == 'o') { base = 8; - } else if (to_lower(to_conv.conv_name) == 'x' || to_conv.conv_name == 'p') { + } else if (internal::tolower(to_conv.conv_name) == 'x' || + to_conv.conv_name == 'p') { base = 16; } else if (to_conv.conv_name == 'd') { base = 10; @@ -122,7 +123,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { return READ_OK; } - if (to_lower(cur_char) == 'x') { + if (internal::tolower(cur_char) == 'x') { // This is a valid hex prefix. is_number = false; @@ -175,17 +176,18 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { const uintmax_t max_div_by_base = MAX / base; - if (internal::isalnum(cur_char) && b36_char_to_int(cur_char) < base) { + if (internal::isalnum(cur_char) && + internal::b36_char_to_int(cur_char) < base) { is_number = true; } bool has_overflow = false; size_t i = 0; for (; i < max_width && internal::isalnum(cur_char) && - b36_char_to_int(cur_char) < base; + internal::b36_char_to_int(cur_char) < base; ++i, cur_char = reader->getc()) { - uintmax_t cur_digit = b36_char_to_int(cur_char); + uintmax_t cur_digit = internal::b36_char_to_int(cur_char); if (result == MAX) { has_overflow = true; diff --git a/libc/src/stdio/scanf_core/ptr_converter.cpp b/libc/src/stdio/scanf_core/ptr_converter.cpp index 1a42a389d74b4..37f002d3da698 100644 --- a/libc/src/stdio/scanf_core/ptr_converter.cpp +++ b/libc/src/stdio/scanf_core/ptr_converter.cpp @@ -8,6 +8,7 @@ #include "src/stdio/scanf_core/ptr_converter.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/stdio/scanf_core/converter_utils.h" #include "src/stdio/scanf_core/core_structs.h" @@ -24,7 +25,8 @@ int convert_pointer(Reader *reader, const FormatSection &to_conv) { // Check if it's exactly the nullptr string, if so then it's a nullptr. char cur_char = reader->getc(); size_t i = 0; - for (; i < sizeof(nullptr_string) && to_lower(cur_char) == nullptr_string[i]; + for (; i < (sizeof(nullptr_string) - 1) && + internal::tolower(cur_char) == nullptr_string[i]; ++i) { cur_char = reader->getc(); } diff --git a/libc/test/UnitTest/MemoryMatcher.cpp b/libc/test/UnitTest/MemoryMatcher.cpp index 244f25572c378..3cd5174fd7f75 100644 --- a/libc/test/UnitTest/MemoryMatcher.cpp +++ b/libc/test/UnitTest/MemoryMatcher.cpp @@ -8,6 +8,7 @@ #include "MemoryMatcher.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "test/UnitTest/Test.h" @@ -40,7 +41,8 @@ bool MemoryMatcher::match(MemoryView actualValue) { static void display(char C) { const auto print = [](unsigned char I) { - tlog << static_cast(I < 10 ? '0' + I : 'A' + I - 10); + tlog << static_cast(LIBC_NAMESPACE::internal::toupper( + LIBC_NAMESPACE::internal::int_to_b36_char(I))); }; print(static_cast(C) / 16); print(static_cast(C) & 15); diff --git a/libc/test/src/__support/CPP/stringview_test.cpp b/libc/test/src/__support/CPP/stringview_test.cpp index 6b68f2a1c47a9..c9348243745a7 100644 --- a/libc/test/src/__support/CPP/stringview_test.cpp +++ b/libc/test/src/__support/CPP/stringview_test.cpp @@ -109,8 +109,6 @@ TEST(LlvmLibcStringViewTest, Observer) { ASSERT_EQ(ABC.back(), 'c'); } -bool isDigit(char c) { return c >= '0' && c <= '9'; } - TEST(LlvmLibcStringViewTest, FindFirstOf) { string_view Tmp("abca"); ASSERT_TRUE(Tmp.find_first_of('a') == 0); @@ -236,6 +234,9 @@ TEST(LlvmLibcStringViewTest, FindFirstNotOf) { TEST(LlvmLibcStringViewTest, Contains) { string_view Empty; + static_assert( + 'a' < 'z', + "This test only supports character encodings where 'a' is below 'z'"); for (char c = 'a'; c < 'z'; ++c) EXPECT_FALSE(Empty.contains(c)); diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp index b71d36111d725..18ddd2b14b8c8 100644 --- a/libc/test/src/ctype/isalnum_test.cpp +++ b/libc/test/src/ctype/isalnum_test.cpp @@ -6,18 +6,45 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isalnum.h" #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsAlNum, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isalnum('a'), 0); + EXPECT_NE(LIBC_NAMESPACE::isalnum('B'), 0); + EXPECT_NE(LIBC_NAMESPACE::isalnum('3'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isalnum(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char ALNUM_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return true; + return false; +} + TEST(LlvmLibcIsAlNum, DefaultLocale) { // Loops through all characters, verifying that numbers and letters // return non-zero integer and everything else returns a zero. - for (int c = -255; c < 255; ++c) { - if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || - ('0' <= c && c <= '9')) - EXPECT_NE(LIBC_NAMESPACE::isalnum(c), 0); + for (int ch = -255; ch < 255; ++ch) { + if (in_span(ch, ALNUM_ARRAY)) + EXPECT_NE(LIBC_NAMESPACE::isalnum(ch), 0); else - EXPECT_EQ(LIBC_NAMESPACE::isalnum(c), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum(ch), 0); } } diff --git a/libc/test/src/ctype/isalpha_test.cpp b/libc/test/src/ctype/isalpha_test.cpp index 10cdb962ee2ee..e54b580dbe264 100644 --- a/libc/test/src/ctype/isalpha_test.cpp +++ b/libc/test/src/ctype/isalpha_test.cpp @@ -6,15 +6,43 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isalpha.h" #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsAlpha, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isalpha('a'), 0); + EXPECT_NE(LIBC_NAMESPACE::isalpha('B'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isalpha('3'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char ALPHA_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return true; + return false; +} + TEST(LlvmLibcIsAlpha, DefaultLocale) { // Loops through all characters, verifying that letters return a // non-zero integer and everything else returns zero. + // TODO: encoding indep for (int ch = -255; ch < 255; ++ch) { - if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) + if (in_span(ch, ALPHA_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isalpha(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isalpha(ch), 0); diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp index a9f84db3ef7e8..adea55e59c74d 100644 --- a/libc/test/src/ctype/isdigit_test.cpp +++ b/libc/test/src/ctype/isdigit_test.cpp @@ -6,15 +6,39 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isdigit.h" #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsDigit, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isdigit('3'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isdigit('a'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit('B'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char DIGIT_ARRAY[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return true; + return false; +} + TEST(LlvmLibcIsDigit, DefaultLocale) { - // Loops through all characters, verifying that numbers return a - // non-zero integer and everything else returns zero. + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if ('0' <= ch && ch <= '9') + if (in_span(ch, DIGIT_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isdigit(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isdigit(ch), 0); diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp index ba7caf65b6fd3..f9414bd8cbd09 100644 --- a/libc/test/src/ctype/islower_test.cpp +++ b/libc/test/src/ctype/islower_test.cpp @@ -6,14 +6,40 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/islower.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsLower, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::islower('a'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::islower('B'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower('3'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char LOWER_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return true; + return false; +} + TEST(LlvmLibcIsLower, DefaultLocale) { - // Loops through all characters, verifying that lowercase letters - // return a non-zero integer and everything else returns zero. + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if ('a' <= ch && ch <= 'z') + if (in_span(ch, LOWER_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::islower(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::islower(ch), 0); diff --git a/libc/test/src/ctype/isupper_test.cpp b/libc/test/src/ctype/isupper_test.cpp index 05b2fd069ef06..94def1a9dcccd 100644 --- a/libc/test/src/ctype/isupper_test.cpp +++ b/libc/test/src/ctype/isupper_test.cpp @@ -6,14 +6,40 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isupper.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsUpper, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isupper('B'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isupper('a'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper('3'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char UPPER_ARRAY[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return true; + return false; +} + TEST(LlvmLibcIsUpper, DefaultLocale) { - // Loops through all characters, verifying that uppercase letters - // return a non-zero integer and everything else returns zero. + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if ('A' <= ch && ch <= 'Z') + if (in_span(ch, UPPER_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isupper(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isupper(ch), 0); diff --git a/libc/test/src/ctype/isxdigit_test.cpp b/libc/test/src/ctype/isxdigit_test.cpp index b8f27a968540c..d7253d549907b 100644 --- a/libc/test/src/ctype/isxdigit_test.cpp +++ b/libc/test/src/ctype/isxdigit_test.cpp @@ -6,13 +6,41 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isxdigit.h" + #include "test/UnitTest/Test.h" -TEST(LlvmLibcIsXDigit, DefaultLocale) { +TEST(LlvmLibcIsXdigit, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isxdigit('a'), 0); + EXPECT_NE(LIBC_NAMESPACE::isxdigit('B'), 0); + EXPECT_NE(LIBC_NAMESPACE::isxdigit('3'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isxdigit('z'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char XDIGIT_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', + 'F', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return true; + return false; +} + +TEST(LlvmLibcIsXdigit, DefaultLocale) { + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || - ('A' <= ch && ch <= 'F')) + if (in_span(ch, XDIGIT_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isxdigit(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isxdigit(ch), 0); diff --git a/libc/test/src/ctype/tolower_test.cpp b/libc/test/src/ctype/tolower_test.cpp index 3770ce4ea68b6..59432c43297b3 100644 --- a/libc/test/src/ctype/tolower_test.cpp +++ b/libc/test/src/ctype/tolower_test.cpp @@ -6,14 +6,51 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/tolower.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcToLower, SimpleTest) { + EXPECT_EQ(LIBC_NAMESPACE::tolower('a'), int('a')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('B'), int('b')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('3'), int('3')); + + EXPECT_EQ(LIBC_NAMESPACE::tolower(' '), int(' ')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('?'), int('?')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('\0'), int('\0')); + EXPECT_EQ(LIBC_NAMESPACE::tolower(-1), int(-1)); +} + +// TODO: Merge the ctype tests using this framework. +// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same +// order. +constexpr char UPPER_ARR[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; +constexpr char LOWER_ARR[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +static_assert( + sizeof(UPPER_ARR) == sizeof(LOWER_ARR), + "There must be the same number of uppercase and lowercase letters."); + +int span_index(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return static_cast(i); + return -1; +} + TEST(LlvmLibcToLower, DefaultLocale) { for (int ch = -255; ch < 255; ++ch) { - // This follows pattern 'A' + 32 = 'a'. - if ('A' <= ch && ch <= 'Z') - EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch + 32); + int char_index = span_index(ch, UPPER_ARR); + if (char_index != -1) + EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), + static_cast(LOWER_ARR[char_index])); else EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch); } diff --git a/libc/test/src/ctype/toupper_test.cpp b/libc/test/src/ctype/toupper_test.cpp index 0413b43fb6009..045b00bbb4b93 100644 --- a/libc/test/src/ctype/toupper_test.cpp +++ b/libc/test/src/ctype/toupper_test.cpp @@ -6,14 +6,51 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/toupper.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcToUpper, SimpleTest) { + EXPECT_EQ(LIBC_NAMESPACE::toupper('a'), int('A')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('B'), int('B')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('3'), int('3')); + + EXPECT_EQ(LIBC_NAMESPACE::toupper(' '), int(' ')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('?'), int('?')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('\0'), int('\0')); + EXPECT_EQ(LIBC_NAMESPACE::toupper(-1), int(-1)); +} + +// TODO: Merge the ctype tests using this framework. +// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same +// order. +constexpr char UPPER_ARR[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; +constexpr char LOWER_ARR[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +static_assert( + sizeof(UPPER_ARR) == sizeof(LOWER_ARR), + "There must be the same number of uppercase and lowercase letters."); + +int span_index(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) + if (static_cast(arr[i]) == ch) + return static_cast(i); + return -1; +} + TEST(LlvmLibcToUpper, DefaultLocale) { for (int ch = -255; ch < 255; ++ch) { - // This follows pattern 'a' - 32 = 'A'. - if ('a' <= ch && ch <= 'z') - EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch - 32); + int char_index = span_index(ch, LOWER_ARR); + if (char_index != -1) + EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), + static_cast(UPPER_ARR[char_index])); else EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch); } diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h index 8a67848e4c330..6cfaddcbedeb6 100644 --- a/libc/test/src/stdlib/StrtolTest.h +++ b/libc/test/src/stdlib/StrtolTest.h @@ -8,6 +8,7 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/properties/architectures.h" #include "src/errno/libc_errno.h" #include "test/UnitTest/Test.h" @@ -16,14 +17,6 @@ using LIBC_NAMESPACE::cpp::is_signed_v; -static inline char int_to_b36_char(int input) { - if (input < 0 || input > 36) - return '0'; - if (input < 10) - return static_cast('0' + input); - return static_cast('A' + input - 10); -} - template struct StrtoTest : public LIBC_NAMESPACE::testing::Test { using FunctionT = ReturnT (*)(const char *, char **, int); @@ -207,7 +200,8 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test { char small_string[4] = {'\0', '\0', '\0', '\0'}; for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = int_to_b36_char(first_digit); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); if (first_digit < base) { LIBC_NAMESPACE::libc_errno = 0; ASSERT_EQ(func(small_string, nullptr, base), @@ -223,9 +217,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = int_to_b36_char(first_digit); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = int_to_b36_char(second_digit); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); if (first_digit < base && second_digit < base) { LIBC_NAMESPACE::libc_errno = 0; ASSERT_EQ( @@ -248,11 +244,14 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = int_to_b36_char(first_digit); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = int_to_b36_char(second_digit); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); for (int third_digit = 0; third_digit <= limit; ++third_digit) { - small_string[2] = int_to_b36_char(third_digit); + small_string[2] = + LIBC_NAMESPACE::internal::int_to_b36_char(third_digit); if (first_digit < base && second_digit < base && third_digit < base) { diff --git a/libc/test/src/string/strcmp_test.cpp b/libc/test/src/string/strcmp_test.cpp index ef58dc608c83b..234447610222f 100644 --- a/libc/test/src/string/strcmp_test.cpp +++ b/libc/test/src/string/strcmp_test.cpp @@ -25,13 +25,13 @@ TEST(LlvmLibcStrCmpTest, EmptyStringShouldNotEqualNonEmptyString) { const char *s2 = "abc"; int result = LIBC_NAMESPACE::strcmp(empty, s2); // This should be '\0' - 'a' = -97 - ASSERT_EQ(result, -97); + ASSERT_EQ(result, '\0' - 'a'); // Similar case if empty string is second argument. const char *s3 = "123"; result = LIBC_NAMESPACE::strcmp(s3, empty); // This should be '1' - '\0' = 49 - ASSERT_EQ(result, 49); + ASSERT_EQ(result, '1' - '\0'); } TEST(LlvmLibcStrCmpTest, EqualStringsShouldReturnZero) { @@ -50,12 +50,12 @@ TEST(LlvmLibcStrCmpTest, ShouldReturnResultOfFirstDifference) { const char *s2 = "___C55__"; int result = LIBC_NAMESPACE::strcmp(s1, s2); // This should return 'B' - 'C' = -1. - ASSERT_EQ(result, -1); + ASSERT_EQ(result, 'B' - 'C'); // Verify operands reversed. result = LIBC_NAMESPACE::strcmp(s2, s1); // This should return 'C' - 'B' = 1. - ASSERT_EQ(result, 1); + ASSERT_EQ(result, 'C' - 'B'); } TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) { @@ -63,12 +63,12 @@ TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) { const char *s2 = "abCd"; int result = LIBC_NAMESPACE::strcmp(s1, s2); // 'c' - 'C' = 32. - ASSERT_EQ(result, 32); + ASSERT_EQ(result, 'c' - 'C'); // Verify operands reversed. result = LIBC_NAMESPACE::strcmp(s2, s1); // 'C' - 'c' = -32. - ASSERT_EQ(result, -32); + ASSERT_EQ(result, 'C' - 'c'); } TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) { @@ -76,12 +76,12 @@ TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) { const char *s2 = "abcd"; int result = LIBC_NAMESPACE::strcmp(s1, s2); // '\0' - 'd' = -100. - ASSERT_EQ(result, -100); + ASSERT_EQ(result, -'\0' - 'd'); // Verify operands reversed. result = LIBC_NAMESPACE::strcmp(s2, s1); // 'd' - '\0' = 100. - ASSERT_EQ(result, 100); + ASSERT_EQ(result, 'd' - '\0'); } TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) { @@ -89,11 +89,11 @@ TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) { const char *b = "b"; int result = LIBC_NAMESPACE::strcmp(b, a); // 'b' - 'a' = 1. - ASSERT_EQ(result, 1); + ASSERT_EQ(result, 'b' - 'a'); result = LIBC_NAMESPACE::strcmp(a, b); // 'a' - 'b' = -1. - ASSERT_EQ(result, -1); + ASSERT_EQ(result, 'a' - 'b'); } TEST(LlvmLibcStrCmpTest, Case) { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 2573788658d59..c5a0076d2ef30 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -740,6 +740,7 @@ libc_support_library( hdrs = ["src/__support/integer_literals.h"], deps = [ ":__support_cpp_limits", + ":__support_ctype_utils", ":__support_uint128", ], ) @@ -772,6 +773,7 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", + ":__support_ctype_utils", ], ) @@ -4450,6 +4452,7 @@ libc_support_library( ":__support_cpp_limits", ":__support_cpp_span", ":__support_cpp_string_view", + ":__support_ctype_utils", ":__support_float_to_string", ":__support_fputil_fenv_impl", ":__support_fputil_fp_bits", diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index f387741e95d8b..6db3456edbb70 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -63,12 +63,12 @@ libc_support_library( "//libc:__support_stringutil", "//libc:__support_uint128", "//libc:errno", - "//libc:llvm_libc_macros_stdfix_macros", - "//llvm:Support", "//libc:func_aligned_alloc", "//libc:func_free", "//libc:func_malloc", "//libc:func_realloc", + "//libc:llvm_libc_macros_stdfix_macros", + "//llvm:Support", ], ) @@ -121,6 +121,7 @@ libc_support_library( "//libc:__support_cpp_bitset", "//libc:__support_cpp_span", "//libc:__support_cpp_type_traits", + "//libc:__support_ctype_utils", "//libc:__support_macros_config", ], ) From 249755cedb17ffa707253edcef1a388f807caa35 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 3 Dec 2024 12:37:30 -0800 Subject: [PATCH 124/191] Reland "Add a pass to collect dropped var stats for MIR" (#117044) Moved the MIR Test to the unittests/CodeGen folder This is patch is part of a stack of patches, and follows https://github.com/llvm/llvm-project/pull/117042 I moved the MIR test to the unittests/CodeGen folder I am trying to reland https://github.com/llvm/llvm-project/pull/115566 --- .../llvm/CodeGen/DroppedVariableStats.h | 48 +- .../llvm/CodeGen/MachineFunctionPass.h | 2 + llvm/lib/CodeGen/DroppedVariableStats.cpp | 63 +- llvm/lib/CodeGen/MachineFunctionPass.cpp | 15 +- llvm/unittests/CodeGen/CMakeLists.txt | 1 + .../CodeGen/DroppedVariableStatsMIRTest.cpp | 1067 +++++++++++++++++ 6 files changed, 1193 insertions(+), 3 deletions(-) create mode 100644 llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h index 371d775b02e87..f6050c68c91aa 100644 --- a/llvm/include/llvm/CodeGen/DroppedVariableStats.h +++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h @@ -7,7 +7,7 @@ ///===---------------------------------------------------------------------===// /// \file /// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_value that get dropped due to an optimization pass. +/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. /// ///===---------------------------------------------------------------------===// @@ -221,6 +221,52 @@ class DroppedVariableStatsIR : public DroppedVariableStats { } }; +/// A class to collect and print dropped debug information due to MIR +/// optimization passes. After every MIR pass is run, it will print how many +/// #DBG_VALUEs were dropped due to that pass. +class DroppedVariableStatsMIR : public DroppedVariableStats { +public: + DroppedVariableStatsMIR() : llvm::DroppedVariableStats(false) {} + + void runBeforePass(StringRef PassID, MachineFunction *MF) { + if (PassID == "Debug Variable Analysis") + return; + setup(); + return runOnMachineFunction(MF, true); + } + + void runAfterPass(StringRef PassID, MachineFunction *MF) { + if (PassID == "Debug Variable Analysis") + return; + runOnMachineFunction(MF, false); + calculateDroppedVarStatsOnMachineFunction(MF, PassID, MF->getName().str()); + cleanup(); + } + +private: + const MachineFunction *MFunc; + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::MachineFunction. + void runOnMachineFunction(const MachineFunction *MF, bool Before); + /// Iterate over all Instructions in a MachineFunction and report any dropped + /// debug information. + void calculateDroppedVarStatsOnMachineFunction(const MachineFunction *MF, + StringRef PassID, + StringRef FuncOrModName); + /// Override base class method to run on an llvm::MachineFunction + /// specifically. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap &InlinedAtsMap, + VarID Var) override; + /// Override base class method to run on DBG_VALUEs specifically. + virtual void visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) override; +}; + } // namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/MachineFunctionPass.h b/llvm/include/llvm/CodeGen/MachineFunctionPass.h index caaf22c2139e3..d82b593497ffc 100644 --- a/llvm/include/llvm/CodeGen/MachineFunctionPass.h +++ b/llvm/include/llvm/CodeGen/MachineFunctionPass.h @@ -18,6 +18,7 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H +#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" @@ -67,6 +68,7 @@ class MachineFunctionPass : public FunctionPass { MachineFunctionProperties RequiredProperties; MachineFunctionProperties SetProperties; MachineFunctionProperties ClearedProperties; + DroppedVariableStatsMIR DroppedVarStatsMF; /// createPrinterPass - Get a machine function printer pass. Pass *createPrinterPass(raw_ostream &O, diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp index 122fcad1293f1..71f91292160f5 100644 --- a/llvm/lib/CodeGen/DroppedVariableStats.cpp +++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp @@ -7,7 +7,7 @@ ///===---------------------------------------------------------------------===// /// \file /// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_value that get dropped due to an optimization pass. +/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. /// ///===---------------------------------------------------------------------===// @@ -192,3 +192,64 @@ void DroppedVariableStatsIR::visitEveryDebugRecord( } } } + +void DroppedVariableStatsMIR::runOnMachineFunction(const MachineFunction *MF, + bool Before) { + auto &DebugVariables = DebugVariablesStack.back()[&MF->getFunction()]; + auto FuncName = MF->getName(); + MFunc = MF; + run(DebugVariables, FuncName, Before); +} + +void DroppedVariableStatsMIR::calculateDroppedVarStatsOnMachineFunction( + const MachineFunction *MF, StringRef PassID, StringRef FuncOrModName) { + MFunc = MF; + StringRef FuncName = MF->getName(); + const Function *Func = &MF->getFunction(); + DebugVariables &DbgVariables = DebugVariablesStack.back()[Func]; + calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, + "MachineFunction", Func); +} + +void DroppedVariableStatsMIR::visitEveryInstruction( + unsigned &DroppedCount, DenseMap &InlinedAtsMap, + VarID Var) { + unsigned PrevDroppedCount = DroppedCount; + const DIScope *DbgValScope = std::get<0>(Var); + for (const auto &MBB : *MFunc) { + for (const auto &MI : MBB) { + if (!MI.isDebugInstr()) { + auto *DbgLoc = MI.getDebugLoc().get(); + if (!DbgLoc) + continue; + + auto *Scope = DbgLoc->getScope(); + if (updateDroppedCount(DbgLoc, Scope, DbgValScope, InlinedAtsMap, Var, + DroppedCount)) + break; + } + } + if (PrevDroppedCount != DroppedCount) { + PrevDroppedCount = DroppedCount; + break; + } + } +} + +void DroppedVariableStatsMIR::visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) { + for (const auto &MBB : *MFunc) { + for (const auto &MI : MBB) { + if (MI.isDebugValueLike()) { + auto *DbgVar = MI.getDebugVariable(); + if (!DbgVar) + continue; + auto DbgLoc = MI.getDebugLoc(); + populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, + FuncName, Before); + } + } + } +} diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp index 62ac3e32d24d9..e803811643f87 100644 --- a/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -32,6 +32,11 @@ using namespace llvm; using namespace ore; +static cl::opt DroppedVarStatsMIR( + "dropped-variable-stats-mir", cl::Hidden, + cl::desc("Dump dropped debug variables stats for MIR passes"), + cl::init(false)); + Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { return createMachineFunctionPrinterPass(O, Banner); @@ -91,7 +96,15 @@ bool MachineFunctionPass::runOnFunction(Function &F) { MFProps.reset(ClearedProperties); - bool RV = runOnMachineFunction(MF); + bool RV; + if (DroppedVarStatsMIR) { + auto PassName = getPassName(); + DroppedVarStatsMF.runBeforePass(PassName, &MF); + RV = runOnMachineFunction(MF); + DroppedVarStatsMF.runAfterPass(PassName, &MF); + } else { + RV = runOnMachineFunction(MF); + } if (ShouldEmitSizeRemarks) { // We wanted size remarks. Check if there was a change to the number of diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 807fd1a9b7b56..50ef1bb5b7af2 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_unittest(CodeGenTests DIEHashTest.cpp DIETest.cpp DroppedVariableStatsIRTest.cpp + DroppedVariableStatsMIRTest.cpp DwarfStringPoolEntryRefTest.cpp InstrRefLDVTest.cpp LowLevelTypeTest.cpp diff --git a/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp new file mode 100644 index 0000000000000..b26a89c7adcba --- /dev/null +++ b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp @@ -0,0 +1,1067 @@ +//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests +//----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/MIRParser/MIRParser.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Pass.h" +#include "llvm/Passes/StandardInstrumentations.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "gtest/gtest.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +namespace { + +std::unique_ptr +createTargetMachine(std::string TT, StringRef CPU, StringRef FS) { + std::string Error; + const Target *T = TargetRegistry::lookupTarget(TT, Error); + if (!T) + return nullptr; + TargetOptions Options; + return std::unique_ptr( + static_cast(T->createTargetMachine( + TT, CPU, FS, Options, std::nullopt, std::nullopt))); +} + +std::unique_ptr parseMIR(const TargetMachine &TM, StringRef MIRCode, + MachineModuleInfo &MMI, LLVMContext *Context) { + SMDiagnostic Diagnostic; + std::unique_ptr M; + std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); + auto MIR = createMIRParser(std::move(MBuffer), *Context); + if (!MIR) + return nullptr; + + std::unique_ptr Mod = MIR->parseIRModule(); + if (!Mod) + return nullptr; + + Mod->setDataLayout(TM.createDataLayout()); + + if (MIR->parseMachineFunctions(*Mod, MMI)) { + M.reset(); + return nullptr; + } + return Mod; +} +// This test ensures that if a DBG_VALUE and an instruction that exists in the +// same scope as that DBG_VALUE are both deleted as a result of an optimization +// pass, debug information is considered not dropped. +TEST(DroppedVariableStatsMIR, BothDeleted) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4) + !12 = !DILocation(line: 2, column: 11, scope: !4) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + for (auto &MI : MBB) { + auto *DbgLoc = MI.getDebugLoc().get(); + if (DbgLoc) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), false); +} + +// This test ensures that if a DBG_VALUE is dropped after an optimization pass, +// but an instruction that shares the same scope as the DBG_VALUE still exists, +// debug information is conisdered dropped. +TEST(DroppedVariableStatsMIR, DbgValLost) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4) + !12 = !DILocation(line: 2, column: 11, scope: !4) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); +} + +// This test ensures that if a #dbg_value is dropped after an optimization pass, +// but an instruction that has an unrelated scope as the #dbg_value still +// exists, debug information is conisdered not dropped. +TEST(DroppedVariableStatsMIR, UnrelatedScopes) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4) + !12 = !DILocation(line: 2, column: 11, scope: !13) + !13 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), false); +} + +// This test ensures that if a #dbg_value is dropped after an optimization pass, +// but an instruction that has a scope which is a child of the #dbg_value scope +// still exists, debug information is conisdered dropped. +TEST(DroppedVariableStatsMIR, ChildScopes) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4) + !12 = !DILocation(line: 2, column: 11, scope: !13) + !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); +} + +// This test ensures that if a DBG_VALUE is dropped after an optimization pass, +// but an instruction that has a scope which is a child of the DBG_VALUE scope +// still exists, and the DBG_VALUE is inlined at another location, debug +// information is conisdered not dropped. +TEST(DroppedVariableStatsMIR, InlinedAt) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) + !12 = !DILocation(line: 2, column: 11, scope: !13) + !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) + !14 = !DILocation(line: 3, column: 2, scope: !4) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), false); +} + +// This test ensures that if a DBG_VALUE is dropped after an optimization pass, +// but an instruction that has a scope which is a child of the DBG_VALUE scope +// still exists, and the DBG_VALUE and the instruction are inlined at another +// location, debug information is conisdered dropped. +TEST(DroppedVariableStatsMIR, InlinedAtShared) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) + !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !14) + !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) + !14 = !DILocation(line: 3, column: 2, scope: !4) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); +} + +// This test ensures that if a DBG_VALUE is dropped after an optimization pass, +// but an instruction that has a scope which is a child of the DBG_VALUE scope +// still exists, and the instruction is inlined at a location that is the +// DBG_VALUE's inlined at location, debug information is conisdered dropped. +TEST(DroppedVariableStatsMIR, InlinedAtChild) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + + LLVMContext C; + + const char *MIR = + R"( +--- | + ; ModuleID = '/tmp/test.ll' + source_filename = "/tmp/test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { + entry: + #dbg_value(i32 %x, !10, !DIExpression(), !11) + %add = add nsw i32 %x, 1, !dbg !12 + ret i32 0 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + !llvm.ident = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") + !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{!"clang"} + !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) + !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8} + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !{!10} + !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) + !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) + !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !15) + !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) + !14 = !DILocation(line: 3, column: 2, scope: !4) + !15 = !DILocation(line: 4, column: 5, scope: !13, inlinedAt: !14) + +... +--- +name: _Z3fooi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: _, preferred-register: '', flags: [ ] } + - { id: 1, class: _, preferred-register: '', flags: [ ] } + - { id: 2, class: _, preferred-register: '', flags: [ ] } + - { id: 3, class: _, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0 + + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 0 + DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 + %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + )"; + auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); + MachineModuleInfo MMI(TM.get()); + std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); + ASSERT_TRUE(M); + + DroppedVariableStatsMIR Stats; + auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); + Stats.runBeforePass("Test", MF); + + // This loop simulates an IR pass that drops debug information. + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isDebugValueLike()) { + MI.eraseFromParent(); + break; + } + } + break; + } + + Stats.runAfterPass("Test", MF); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); +} + +} // end anonymous namespace From 51553227f010e3a9a439b2e57af15a6797f69a90 Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Tue, 3 Dec 2024 21:42:38 +0100 Subject: [PATCH 125/191] Revert "Reland of #108413: [Offload] Introduce offload-tblgen and initial new API implementation" (#118541) Reverts llvm/llvm-project#118503 Broke bot https://lab.llvm.org/staging/#/builders/131/builds/9701/steps/5/logs/stdio --- offload/CMakeLists.txt | 3 - offload/cmake/OpenMPTesting.cmake | 12 - offload/liboffload/API/APIDefs.td | 212 ------ offload/liboffload/API/CMakeLists.txt | 25 - offload/liboffload/API/Common.td | 141 ---- offload/liboffload/API/Device.td | 106 --- offload/liboffload/API/OffloadAPI.td | 15 - offload/liboffload/API/Platform.td | 112 ---- offload/liboffload/API/README.md | 150 ----- offload/liboffload/CMakeLists.txt | 37 -- offload/liboffload/README.md | 8 - offload/liboffload/exports | 6 - offload/liboffload/include/OffloadImpl.hpp | 94 --- .../liboffload/include/generated/OffloadAPI.h | 610 ------------------ .../include/generated/OffloadEntryPoints.inc | 441 ------------- .../include/generated/OffloadFuncs.inc | 34 - .../generated/OffloadImplFuncDecls.inc | 38 -- .../include/generated/OffloadPrint.hpp | 428 ------------ offload/liboffload/src/Helpers.hpp | 95 --- offload/liboffload/src/OffloadImpl.cpp | 247 ------- offload/liboffload/src/OffloadLib.cpp | 44 -- .../common/include/PluginInterface.h | 4 - offload/test/lit.cfg | 3 +- offload/test/lit.site.cfg.in | 1 - .../tools/offload-tblgen/default_returns.td | 40 -- .../test/tools/offload-tblgen/entry_points.td | 37 -- .../tools/offload-tblgen/functions_basic.td | 39 -- .../offload-tblgen/functions_code_loc.td | 26 - .../offload-tblgen/functions_ranged_param.td | 36 -- .../test/tools/offload-tblgen/print_enum.td | 34 - .../tools/offload-tblgen/print_function.td | 38 -- .../tools/offload-tblgen/type_tagged_enum.td | 76 --- offload/tools/offload-tblgen/APIGen.cpp | 229 ------- offload/tools/offload-tblgen/CMakeLists.txt | 24 - .../tools/offload-tblgen/EntryPointGen.cpp | 138 ---- offload/tools/offload-tblgen/FuncsGen.cpp | 74 --- offload/tools/offload-tblgen/GenCommon.hpp | 67 -- offload/tools/offload-tblgen/Generators.hpp | 23 - offload/tools/offload-tblgen/PrintGen.cpp | 226 ------- offload/tools/offload-tblgen/RecordTypes.hpp | 227 ------- .../tools/offload-tblgen/offload-tblgen.cpp | 101 --- offload/unittests/CMakeLists.txt | 3 +- offload/unittests/OffloadAPI/CMakeLists.txt | 16 - .../OffloadAPI/common/Environment.cpp | 96 --- .../OffloadAPI/common/Environment.hpp | 17 - .../unittests/OffloadAPI/common/Fixtures.hpp | 64 -- .../OffloadAPI/device/olDeviceInfo.hpp | 21 - .../OffloadAPI/device/olGetDevice.cpp | 39 -- .../OffloadAPI/device/olGetDeviceCount.cpp | 28 - .../OffloadAPI/device/olGetDeviceInfo.cpp | 76 --- .../OffloadAPI/device/olGetDeviceInfoSize.cpp | 58 -- .../OffloadAPI/platform/olGetPlatform.cpp | 28 - .../platform/olGetPlatformCount.cpp | 22 - .../OffloadAPI/platform/olGetPlatformInfo.cpp | 76 --- .../platform/olGetPlatformInfoSize.cpp | 57 -- .../OffloadAPI/platform/olPlatformInfo.hpp | 20 - 56 files changed, 2 insertions(+), 4920 deletions(-) delete mode 100644 offload/liboffload/API/APIDefs.td delete mode 100644 offload/liboffload/API/CMakeLists.txt delete mode 100644 offload/liboffload/API/Common.td delete mode 100644 offload/liboffload/API/Device.td delete mode 100644 offload/liboffload/API/OffloadAPI.td delete mode 100644 offload/liboffload/API/Platform.td delete mode 100644 offload/liboffload/API/README.md delete mode 100644 offload/liboffload/CMakeLists.txt delete mode 100644 offload/liboffload/README.md delete mode 100644 offload/liboffload/exports delete mode 100644 offload/liboffload/include/OffloadImpl.hpp delete mode 100644 offload/liboffload/include/generated/OffloadAPI.h delete mode 100644 offload/liboffload/include/generated/OffloadEntryPoints.inc delete mode 100644 offload/liboffload/include/generated/OffloadFuncs.inc delete mode 100644 offload/liboffload/include/generated/OffloadImplFuncDecls.inc delete mode 100644 offload/liboffload/include/generated/OffloadPrint.hpp delete mode 100644 offload/liboffload/src/Helpers.hpp delete mode 100644 offload/liboffload/src/OffloadImpl.cpp delete mode 100644 offload/liboffload/src/OffloadLib.cpp delete mode 100644 offload/test/tools/offload-tblgen/default_returns.td delete mode 100644 offload/test/tools/offload-tblgen/entry_points.td delete mode 100644 offload/test/tools/offload-tblgen/functions_basic.td delete mode 100644 offload/test/tools/offload-tblgen/functions_code_loc.td delete mode 100644 offload/test/tools/offload-tblgen/functions_ranged_param.td delete mode 100644 offload/test/tools/offload-tblgen/print_enum.td delete mode 100644 offload/test/tools/offload-tblgen/print_function.td delete mode 100644 offload/test/tools/offload-tblgen/type_tagged_enum.td delete mode 100644 offload/tools/offload-tblgen/APIGen.cpp delete mode 100644 offload/tools/offload-tblgen/CMakeLists.txt delete mode 100644 offload/tools/offload-tblgen/EntryPointGen.cpp delete mode 100644 offload/tools/offload-tblgen/FuncsGen.cpp delete mode 100644 offload/tools/offload-tblgen/GenCommon.hpp delete mode 100644 offload/tools/offload-tblgen/Generators.hpp delete mode 100644 offload/tools/offload-tblgen/PrintGen.cpp delete mode 100644 offload/tools/offload-tblgen/RecordTypes.hpp delete mode 100644 offload/tools/offload-tblgen/offload-tblgen.cpp delete mode 100644 offload/unittests/OffloadAPI/CMakeLists.txt delete mode 100644 offload/unittests/OffloadAPI/common/Environment.cpp delete mode 100644 offload/unittests/OffloadAPI/common/Environment.hpp delete mode 100644 offload/unittests/OffloadAPI/common/Fixtures.hpp delete mode 100644 offload/unittests/OffloadAPI/device/olDeviceInfo.hpp delete mode 100644 offload/unittests/OffloadAPI/device/olGetDevice.cpp delete mode 100644 offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp delete mode 100644 offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp delete mode 100644 offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp delete mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatform.cpp delete mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp delete mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp delete mode 100644 offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp delete mode 100644 offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt index deae8d1a6b50c..e24f0faa91211 100644 --- a/offload/CMakeLists.txt +++ b/offload/CMakeLists.txt @@ -353,9 +353,6 @@ add_subdirectory(tools) # Build target agnostic offloading library. add_subdirectory(src) -add_subdirectory(tools/offload-tblgen) -add_subdirectory(liboffload) - # Add tests. add_subdirectory(test) diff --git a/offload/cmake/OpenMPTesting.cmake b/offload/cmake/OpenMPTesting.cmake index f97def2c52eba..6609d6301d0f9 100644 --- a/offload/cmake/OpenMPTesting.cmake +++ b/offload/cmake/OpenMPTesting.cmake @@ -48,17 +48,6 @@ function(find_standalone_test_dependencies) return() endif() - find_program(OFFLOAD_TBLGEN_EXECUTABLE - NAMES offload-tblgen - PATHS ${OPENMP_LLVM_TOOLS_DIR}) - if (NOT OFFLOAD_TBLGEN_EXECUTABLE) - message(STATUS "Cannot find 'offload-tblgen'.") - message(STATUS "Please put 'not' in your PATH, set OFFLOAD_TBLGEN_EXECUTABLE to its full path, or point OPENMP_LLVM_TOOLS_DIR to its directory.") - message(WARNING "The check targets will not be available!") - set(ENABLE_CHECK_TARGETS FALSE PARENT_SCOPE) - return() - endif() - find_program(OPENMP_NOT_EXECUTABLE NAMES not PATHS ${OPENMP_LLVM_TOOLS_DIR}) @@ -93,7 +82,6 @@ else() set(OPENMP_FILECHECK_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/FileCheck) endif() set(OPENMP_NOT_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/not) - set(OFFLOAD_TBLGEN_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/offload-tblgen) set(OFFLOAD_DEVICE_INFO_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-offload-device-info) endif() diff --git a/offload/liboffload/API/APIDefs.td b/offload/liboffload/API/APIDefs.td deleted file mode 100644 index 60c1b85d26911..0000000000000 --- a/offload/liboffload/API/APIDefs.td +++ /dev/null @@ -1,212 +0,0 @@ -//===-- APIDefs.td - Base definitions for Offload tablegen -*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the class definitions used to implement the Offload API, -// as well as helper functions used to help populate relevant records. -// See offload/API/README.md for more detailed documentation. -// -//===----------------------------------------------------------------------===// - -// Prefix for API naming. This could be hard-coded in the future when a value -// is agreed upon. -defvar PREFIX = "OL"; -defvar prefix = !tolower(PREFIX); - -// Parameter flags -defvar PARAM_IN = 0x1; -defvar PARAM_OUT = 0x2; -defvar PARAM_OPTIONAL = 0x4; -defvar PARAM_IN_OPTIONAL = !or(PARAM_IN, PARAM_OPTIONAL); -defvar PARAM_OUT_OPTIONAL = !or(PARAM_OUT, PARAM_OPTIONAL); - -// Does the type end with '_handle_t'? -class IsHandleType { - // size("_handle_t") == 9 - bit ret = !if(!lt(!size(Type), 9), 0, - !ne(!find(Type, "_handle_t", !sub(!size(Type), 9)), -1)); -} - -// Does the type end with '*'? -class IsPointerType { - bit ret = !ne(!find(Type, "*", !sub(!size(Type), 1)), -1); -} - -// Describes the valid range of a pointer parameter that reperesents an array -class Range { - string begin = Begin; - string end = End; -} - -// Names the parameters that indicate the type and size of the data pointed to -// by an opaque pointer parameter -class TypeInfo { - string enum = TypeEnum; - string size = TypeSize; -} - -class Param Flags = 0> { - string type = Type; - string name = Name; - string desc = Desc; - bits<3> flags = Flags; - Range range = Range<"", "">; - TypeInfo type_info = TypeInfo<"", "">; - bit IsHandle = IsHandleType.ret; - bit IsPointer = IsPointerType.ret; -} - -// A parameter whose range is described by other parameters in the function. -class RangedParam Flags, Range ParamRange> : Param { - let range = ParamRange; -} - -// A parameter (normally of type void*) which has its pointee type and size -// described by other parameters in the function. -class TypeTaggedParam Flags, TypeInfo ParamTypeInfo> : Param { - let type_info = ParamTypeInfo; -} - -class Return Conditions = []> { - string value = Value; - list conditions = Conditions; -} - -class ShouldCheckHandle { - bit ret = !and(P.IsHandle, !eq(!and(PARAM_OPTIONAL, P.flags), 0)); -} - -class ShouldCheckPointer { - bit ret = !and(P.IsPointer, !eq(!and(PARAM_OPTIONAL, P.flags), 0)); -} - -// For a list of returns that contains a specific return code, find and append -// new conditions to that return -class AppendConditionsToReturn Returns, string ReturnValue, - list Conditions> { - list ret = - !foreach(Ret, Returns, - !if(!eq(Ret.value, ReturnValue), - Return, Ret)); -} - -// Add null handle checks to a function's return value descriptions -class AddHandleChecksToReturns Params, list Returns> { - list handle_params = - !foreach(P, Params, !if(ShouldCheckHandle

.ret, P.name, "")); - list handle_params_filt = - !filter(param, handle_params, !ne(param, "")); - list handle_param_conds = - !foreach(handle, handle_params_filt, "`NULL == "#handle#"`"); - - // Does the list of returns already contain ERROR_INVALID_NULL_HANDLE? - bit returns_has_inv_handle = !foldl( - 0, Returns, HasErr, Ret, - !or(HasErr, !eq(Ret.value, PREFIX#"_ERRC_INVALID_NULL_HANDLE"))); - - list returns_out = !if(returns_has_inv_handle, - AppendConditionsToReturn.ret, - !listconcat(Returns, [Return]) - ); -} - -// Add null pointer checks to a function's return value descriptions -class AddPointerChecksToReturns Params, list Returns> { - list ptr_params = - !foreach(P, Params, !if(ShouldCheckPointer

.ret, P.name, "")); - list ptr_params_filt = !filter(param, ptr_params, !ne(param, "")); - list ptr_param_conds = - !foreach(ptr, ptr_params_filt, "`NULL == "#ptr#"`"); - - // Does the list of returns already contain ERROR_INVALID_NULL_POINTER? - bit returns_has_inv_ptr = !foldl( - 0, Returns, HasErr, Ret, - !or(HasErr, !eq(Ret.value, PREFIX#"_ERRC_INVALID_NULL_POINTER"))); - list returns_out = !if(returns_has_inv_ptr, - AppendConditionsToReturn.ret, - !listconcat(Returns, [Return]) - ); -} - -defvar DefaultReturns = [Return, - Return, - Return]; - -class APIObject { - string name; - string desc; -} - -class Function : APIObject { - list params; - list returns; - list details = []; - list analogues = []; - - list returns_with_def = !listconcat(DefaultReturns, returns); - list all_returns = AddPointerChecksToReturns.returns_out>.returns_out; -} - -class Etor { - string name = Name; - string desc = Desc; - string tagged_type; -} - -class TaggedEtor : Etor { - let tagged_type = Type; -} - -class Enum : APIObject { - // This refers to whether the enumerator descriptions specify a return - // type for functions where this enum may be used as an output type. If set, - // all Etor values must be TaggedEtor records - bit is_typed = 0; - - list etors = []; -} - -class StructMember { - string type = Type; - string name = Name; - string desc = Desc; -} - -defvar DefaultPropStructMembers = - [StructMember, - StructMember<"void*", "pNext", "pointer to extension-specific structure">]; - -class StructHasInheritedMembers { - bit ret = !or(!eq(BaseClass, prefix#"_base_properties_t"), - !eq(BaseClass, prefix#"_base_desc_t")); -} - -class Struct : APIObject { - string base_class = ""; - list members; - list all_members = - !if(StructHasInheritedMembers.ret, - DefaultPropStructMembers, [])#members; -} - -class Typedef : APIObject { string value; } - -class FptrTypedef : APIObject { - list params; - list returns; -} - -class Macro : APIObject { - string value; - - string condition; - string alt_value; -} - -class Handle : APIObject; diff --git a/offload/liboffload/API/CMakeLists.txt b/offload/liboffload/API/CMakeLists.txt deleted file mode 100644 index 8fd6cb539374a..0000000000000 --- a/offload/liboffload/API/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# The OffloadGenerate target is used to regenerate the generated files in the -# include directory. These files are checked in with the rest of the source, -# therefore it is only needed when making changes to the API. - -find_program(CLANG_FORMAT clang-format PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) -if (CLANG_FORMAT) - set(LLVM_TARGET_DEFINITIONS ${CMAKE_CURRENT_SOURCE_DIR}/OffloadAPI.td) - - tablegen(OFFLOAD OffloadAPI.h -gen-api) - tablegen(OFFLOAD OffloadEntryPoints.inc -gen-entry-points) - tablegen(OFFLOAD OffloadFuncs.inc -gen-func-names) - tablegen(OFFLOAD OffloadImplFuncDecls.inc -gen-impl-func-decls) - tablegen(OFFLOAD OffloadPrint.hpp -gen-print-header) - - set(OFFLOAD_GENERATED_FILES ${TABLEGEN_OUTPUT}) - add_public_tablegen_target(OffloadGenerate) - add_custom_command(TARGET OffloadGenerate POST_BUILD COMMAND ${CLANG_FORMAT} - -i ${OFFLOAD_GENERATED_FILES}) - add_custom_command(TARGET OffloadGenerate POST_BUILD COMMAND ${CMAKE_COMMAND} - -E copy_if_different ${OFFLOAD_GENERATED_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/../include/generated") -else() - message(WARNING "clang-format was not found, so the OffloadGenerate target\ - will not be available. Offload will still build, but you will not be\ - able to make changes to the API.") -endif() diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td deleted file mode 100644 index 5b19d1d47129e..0000000000000 --- a/offload/liboffload/API/Common.td +++ /dev/null @@ -1,141 +0,0 @@ -def : Macro { - let name = "OL_VERSION_MAJOR"; - let desc = "Major version of the Offload API"; - let value = "0"; -} - -def : Macro { - let name = "OL_VERSION_MINOR"; - let desc = "Minor version of the Offload API"; - let value = "0"; -} - -def : Macro { - let name = "OL_VERSION_PATCH"; - let desc = "Patch version of the Offload API"; - let value = "1"; -} - -def : Macro { - let name = "OL_APICALL"; - let desc = "Calling convention for all API functions"; - let condition = "defined(_WIN32)"; - let value = "__cdecl"; - let alt_value = ""; -} - -def : Macro { - let name = "OL_APIEXPORT"; - let desc = "Microsoft-specific dllexport storage-class attribute"; - let condition = "defined(_WIN32)"; - let value = "__declspec(dllexport)"; - let alt_value = ""; -} - -def : Macro { - let name = "OL_DLLEXPORT"; - let desc = "Microsoft-specific dllexport storage-class attribute"; - let condition = "defined(_WIN32)"; - let value = "__declspec(dllexport)"; -} - -def : Macro { - let name = "OL_DLLEXPORT"; - let desc = "GCC-specific dllexport storage-class attribute"; - let condition = "__GNUC__ >= 4"; - let value = "__attribute__ ((visibility (\"default\")))"; - let alt_value = ""; -} - -def : Handle { - let name = "ol_platform_handle_t"; - let desc = "Handle of a platform instance"; -} - -def : Handle { - let name = "ol_device_handle_t"; - let desc = "Handle of platform's device object"; -} - -def : Handle { - let name = "ol_context_handle_t"; - let desc = "Handle of context object"; -} - -def : Enum { - let name = "ol_errc_t"; - let desc = "Defines Return/Error codes"; - let etors =[ - Etor<"SUCCESS", "Success">, - Etor<"INVALID_VALUE", "Invalid Value">, - Etor<"INVALID_PLATFORM", "Invalid platform">, - Etor<"DEVICE_NOT_FOUND", "Device not found">, - Etor<"INVALID_DEVICE", "Invalid device">, - Etor<"DEVICE_LOST", "Device hung, reset, was removed, or driver update occurred">, - Etor<"UNINITIALIZED", "plugin is not initialized or specific entry-point is not implemented">, - Etor<"OUT_OF_RESOURCES", "Out of resources">, - Etor<"UNSUPPORTED_VERSION", "generic error code for unsupported versions">, - Etor<"UNSUPPORTED_FEATURE", "generic error code for unsupported features">, - Etor<"INVALID_ARGUMENT", "generic error code for invalid arguments">, - Etor<"INVALID_NULL_HANDLE", "handle argument is not valid">, - Etor<"INVALID_NULL_POINTER", "pointer argument may not be nullptr">, - Etor<"INVALID_SIZE", "invalid size or dimensions (e.g., must not be zero, or is out of bounds)">, - Etor<"INVALID_ENUMERATION", "enumerator argument is not valid">, - Etor<"UNSUPPORTED_ENUMERATION", "enumerator argument is not supported by the device">, - Etor<"UNKNOWN", "Unknown or internal error"> - ]; -} - -def : Struct { - let name = "ol_error_struct_t"; - let desc = "Details of the error condition returned by an API call"; - let members = [ - StructMember<"ol_errc_t", "Code", "The error code">, - StructMember<"const char*", "Details", "String containing error details"> - ]; -} - -def : Typedef { - let name = "ol_result_t"; - let desc = "Result type returned by all entry points."; - let value = "const ol_error_struct_t*"; -} - -def : Macro { - let name = "OL_SUCCESS"; - let desc = "Success condition"; - let value = "NULL"; -} - -def : Struct { - let name = "ol_code_location_t"; - let desc = "Code location information that can optionally be associated with an API call"; - let members = [ - StructMember<"const char*", "FunctionName", "Function name">, - StructMember<"const char*", "SourceFile", "Source code file">, - StructMember<"uint32_t", "LineNumber", "Source code line number">, - StructMember<"uint32_t", "ColumnNumber", "Source code column number"> - ]; -} - -def : Function { - let name = "olInit"; - let desc = "Perform initialization of the Offload library and plugins"; - let details = [ - "This must be the first API call made by a user of the Offload library", - "Each call will increment an internal reference count that is decremented by `olShutDown`" - ]; - let params = []; - let returns = []; -} - -def : Function { - let name = "olShutDown"; - let desc = "Release the resources in use by Offload"; - let details = [ - "This decrements an internal reference count. When this reaches 0, all resources will be released", - "Subsequent API calls made after this are not valid" - ]; - let params = []; - let returns = []; -} diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td deleted file mode 100644 index 30c0b71fe7b37..0000000000000 --- a/offload/liboffload/API/Device.td +++ /dev/null @@ -1,106 +0,0 @@ -//===-- Device.td - Device definitions for Offload ---------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains Offload API definitions related to the Device handle -// -//===----------------------------------------------------------------------===// - -def : Enum { - let name = "ol_device_type_t"; - let desc = "Supported device types"; - let etors =[ - Etor<"DEFAULT", "The default device type as preferred by the runtime">, - Etor<"ALL", "Devices of all types">, - Etor<"GPU", "GPU device type">, - Etor<"CPU", "CPU device type">, - ]; -} - -def : Enum { - let name = "ol_device_info_t"; - let desc = "Supported device info"; - let is_typed = 1; - let etors =[ - TaggedEtor<"TYPE", "ol_device_type_t", "type of the device">, - TaggedEtor<"PLATFORM", "ol_platform_handle_t", "the platform associated with the device">, - TaggedEtor<"NAME", "char[]", "Device name">, - TaggedEtor<"VENDOR", "char[]", "Device vendor">, - TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version"> - ]; -} - -def : Function { - let name = "olGetDeviceCount"; - let desc = "Retrieves the number of available devices within a platform"; - let params = [ - Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>, - Param<"uint32_t*", "NumDevices", "pointer to the number of devices.", PARAM_OUT> - ]; - let returns = []; -} - -def : Function { - let name = "olGetDevice"; - let desc = "Retrieves devices within a platform"; - let details = [ - "Multiple calls to this function will return identical device handles, in the same order.", - ]; - let params = [ - Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>, - Param<"uint32_t", "NumEntries", "the number of devices to be added to phDevices, which must be greater than zero", PARAM_IN>, - RangedParam<"ol_device_handle_t*", "Devices", "Array of device handles. " - "If NumEntries is less than the number of devices available, then this function shall only retrieve that number of devices.", PARAM_OUT, - Range<"0", "NumEntries">> - ]; - let returns = [ - Return<"OL_ERRC_INVALID_SIZE", [ - "`NumEntries == 0`" - ]> - ]; -} - -def : Function { - let name = "olGetDeviceInfo"; - let desc = "Queries the given property of the device"; - let details = []; - let params = [ - Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>, - Param<"ol_device_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, - Param<"size_t", "PropSize", "the number of bytes pointed to by PropValue.", PARAM_IN>, - TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. If PropSize is not equal to or greater than the real " - "number of bytes needed to return the info then the OL_ERRC_INVALID_SIZE error is returned and " - "PropValue is not used.", PARAM_OUT, TypeInfo<"PropName" , "PropSize">> - ]; - let returns = [ - Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ - "If `PropName` is not supported by the device." - ]>, - Return<"OL_ERRC_INVALID_SIZE", [ - "`PropSize == 0`", - "If `PropSize` is less than the real number of bytes needed to return the info." - ]>, - Return<"OL_ERRC_INVALID_DEVICE"> - ]; -} - -def : Function { - let name = "olGetDeviceInfoSize"; - let desc = "Returns the storage size of the given device query"; - let details = []; - let params = [ - Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>, - Param<"ol_device_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, - Param<"size_t*", "PropSizeRet", "pointer to the number of bytes required to store the query", PARAM_OUT> - ]; - let returns = [ - Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ - "If `PropName` is not supported by the device." - ]>, - Return<"OL_ERRC_INVALID_DEVICE"> - ]; -} diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td deleted file mode 100644 index 8a0c3c4058122..0000000000000 --- a/offload/liboffload/API/OffloadAPI.td +++ /dev/null @@ -1,15 +0,0 @@ -//===-- OffloadAPI.td - Root tablegen file for Offload -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Always include this file first -include "APIDefs.td" - -// Add API definition files here -include "Common.td" -include "Platform.td" -include "Device.td" diff --git a/offload/liboffload/API/Platform.td b/offload/liboffload/API/Platform.td deleted file mode 100644 index 03e70cf96ac94..0000000000000 --- a/offload/liboffload/API/Platform.td +++ /dev/null @@ -1,112 +0,0 @@ -//===-- Platform.td - Platform definitions for Offload -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains Offload API definitions related to the Platform handle -// -//===----------------------------------------------------------------------===// -def : Function { - let name = "olGetPlatform"; - let desc = "Retrieves all available platforms"; - let details = [ - "Multiple calls to this function will return identical platforms handles, in the same order.", - ]; - let params = [ - Param<"uint32_t", "NumEntries", - "The number of platforms to be added to Platforms. NumEntries must be " - "greater than zero.", - PARAM_IN>, - RangedParam<"ol_platform_handle_t*", "Platforms", - "Array of handle of platforms. If NumEntries is less than the number of " - "platforms available, then olGetPlatform shall only retrieve that " - "number of platforms.", - PARAM_OUT, Range<"0", "NumEntries">> - ]; - let returns = [ - Return<"OL_ERRC_INVALID_SIZE", [ - "`NumEntries == 0`" - ]> - ]; -} - -def : Function { - let name = "olGetPlatformCount"; - let desc = "Retrieves the number of available platforms"; - let params = [ - Param<"uint32_t*", - "NumPlatforms", "returns the total number of platforms available.", - PARAM_OUT> - ]; - let returns = []; -} - -def : Enum { - let name = "ol_platform_info_t"; - let desc = "Supported platform info"; - let is_typed = 1; - let etors = [ - TaggedEtor<"NAME", "char[]", "The string denoting name of the platform. The size of the info needs to be dynamically queried.">, - TaggedEtor<"VENDOR_NAME", "char[]", "The string denoting name of the vendor of the platform. The size of the info needs to be dynamically queried.">, - TaggedEtor<"VERSION", "char[]", "The string denoting the version of the platform. The size of the info needs to be dynamically queried.">, - TaggedEtor<"BACKEND", "ol_platform_backend_t", "The native backend of the platform."> - ]; -} - -def : Enum { - let name = "ol_platform_backend_t"; - let desc = "Identifies the native backend of the platform"; - let etors =[ - Etor<"UNKNOWN", "The backend is not recognized">, - Etor<"CUDA", "The backend is CUDA">, - Etor<"AMDGPU", "The backend is AMDGPU">, - ]; -} - -def : Function { - let name = "olGetPlatformInfo"; - let desc = "Queries the given property of the platform"; - let details = [ - "`olGetPlatformInfoSize` can be used to query the storage size " - "required for the given query." - ]; - let params = [ - Param<"ol_platform_handle_t", "Platform", "handle of the platform", PARAM_IN>, - Param<"ol_platform_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, - Param<"size_t", "PropSize", "the number of bytes pointed to by pPlatformInfo.", PARAM_IN>, - TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. " - "If Size is not equal to or greater to the real number of bytes needed to return the info " - "then the OL_ERRC_INVALID_SIZE error is returned and pPlatformInfo is not used.", PARAM_OUT, - TypeInfo<"PropName" , "PropSize">> - ]; - let returns = [ - Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ - "If `PropName` is not supported by the platform." - ]>, - Return<"OL_ERRC_INVALID_SIZE", [ - "`PropSize == 0`", - "If `PropSize` is less than the real number of bytes needed to return the info." - ]>, - Return<"OL_ERRC_INVALID_PLATFORM"> - ]; -} - -def : Function { - let name = "olGetPlatformInfoSize"; - let desc = "Returns the storage size of the given platform query"; - let details = []; - let params = [ - Param<"ol_platform_handle_t", "Platform", "handle of the platform", PARAM_IN>, - Param<"ol_platform_info_t", "PropName", "type of the info to query", PARAM_IN>, - Param<"size_t*", "PropSizeRet", "pointer to the number of bytes required to store the query", PARAM_OUT> - ]; - let returns = [ - Return<"OL_ERRC_UNSUPPORTED_ENUMERATION", [ - "If `PropName` is not supported by the platform." - ]>, - Return<"OL_ERRC_INVALID_PLATFORM"> - ]; -} diff --git a/offload/liboffload/API/README.md b/offload/liboffload/API/README.md deleted file mode 100644 index 38a055811b2d0..0000000000000 --- a/offload/liboffload/API/README.md +++ /dev/null @@ -1,150 +0,0 @@ -# Offload API definitions - -**Note**: This is a work-in-progress. It is loosely based on equivalent -tooling in Unified Runtime. - -The Tablegen files in this directory are used to define the Offload API. They -are used with the `offload-tblgen` tool to generate API headers, print headers, -and other implementation details. - -The root file is `OffloadAPI.td` - additional `.td` files can be included in -this file to add them to the API. - -## API Objects -The API consists of a number of objects, which always have a *name* field and -*description* field, and are one of the following types: - -### Function -Represents an API entry point function. Has a list of returns and parameters. -Also has fields for details (representing a bullet-point list of -information about the function that would otherwise be too detailed for the -description), and analogues (equivalent functions in other APIs). - -#### Parameter -Represents a parameter to a function, has *type*, *name*, and *desc* fields. -Also has a *flags* field containing flags representing whether the parameter is -in, out, or optional. - -The *type* field is used to infer if the parameter is a pointer or handle type. -A *handle* type is a pointer to an opaque struct, used to abstract over -plugin-specific implementation details. - -There are two special variants of a *parameter*: -* **RangedParameter** - Represents a parameter that has a range described by other parameters. Generally these are pointers to an arbitrary number of objects. The range is used for generating validation and printing code. E.g, a range might be between `(0, NumDevices)` -* **TypeTaggedParameter** - Represents a parameter (usually of `void*` type) that has the type and size of its pointee data described by other function parameters. The type is usually described by a type-tagged enum. This allows functions (e.g. `olGetDeviceInfo`) to return data of an arbitrary type. - -#### Return -A return represents a possible return code from the function, and optionally a -list of conditions in which this value may be returned. The conditions list is -not expected to be exhaustive. A condition is considered free-form text, but -if it is wrapped in \`backticks\` then it is treated as literal code -representing an error condition (e.g. `someParam < 1`). These conditions are -used to automatically create validation checks by the `offload-tblgen` -validation generator. - -Returns are automatically generated for functions with pointer or handle -parameters, so API authors do not need to exhaustively add null checks for -these types of parameters. All functions also get a number of default return -values automatically. - - -### Struct -Represents a struct. Contains a list of members, which each have a *type*, -*name*, and *desc*. - -Also optionally takes a *base_class* field. If this is either of the special -`offload_base_properties_t` or `offload_base_desc_t` structs, then the struct -will inherit members from those structs. The generated struct does **not** use -actual C++ inheritance, but instead explicitly has those members copied in, -which preserves ABI compatibility with C. - -### Enum -Represents a C-style enum. Contains a list of `etor` values, which have a name -and description. - -A `TaggedEtor` record type also exists which addtionally takes a type. This type -is used when the enum is used as a parameter to a function with a type-tagged -function parameter (e.g. `olGetDeviceInfo`). - -All enums automatically get a `_FORCE_UINT32 = 0x7fffffff` value, -which forces the underlying type to be uint32. - -### Handle -Represents a pointer to an opaque struct, as described in the Parameter section. -It does not take any extra fields. - -### Typedef -Represents a typedef, contains only a *value* field. - -### Macro -Represents a C preprocessor `#define`. Contains a *value* field. Optionally -takes a *condition* field, which allows the macro to be conditionally defined, -and an *alt_value* field, which represents the value if the condition is false. - -Macro arguments are presented in the *name* field (e.g. name = `mymacro(arg)`). - -While there may seem little point generating a macro from tablegen, doing this -allows the entire source of the header file to be generated from the tablegen -files, rather than requiring a mix of C source and tablegen. - -## Generation - -### API header -``` -./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-api -``` -The comments in the generated header are in Doxygen format, although -generating documentation from them hasn't been implemented yet. - -The entirety of this header is generated by Tablegen, rather than having a predefined header file that includes one or more `.inc` files. This is because this header is expected to be part of the installation and distributed to end-users, so should be self-contained. - -### Entry Points -``` -./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-entry-points -``` -These functions form the actual Offload interface, and are wrappers over the -functions that contain the actual implementation (see -'Adding a new entry point'). - -They implement automatically generated validation checks, and tracing of -function calls with arguments and results. The tracing can be enabled with the -`OFFLOAD_TRACE` environment variable. - -### Implementation function declarations -``` -./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-impl-func-decls -``` -Generates declarations of the implementation of functions of every entry point -in the API, e.g. `offloadDeviceFoo_impl` for `offloadDeviceFoo`. - -### Print header -``` -./offload-tblgen -I /offload/API /offload/API/OffloadAPI.td --gen-print-header -``` -This header contains `std::ostream &operator<<(std::ostream&)` definitions for -various API objects, including function parameters. - -As with the API header, it is expected that this header is part of the installed -package, so it is entirely generated by Tablegen. - -For ease of implementation, and since it is not strictly part of the API, this -is a C++ header file. If a C version is desirable it could be added. - -### Future Tablegen backends -`RecordTypes.hpp` contains wrappers for all of the API object types, which will -allow more backends to be easily added in future. - -## Adding to the API - -A new object can be added to the API by adding to one of the existing `.td` -files. It is also possible to add a new tablegen file to the API by adding it -to the includes in `OffloadAPI.td`. When the offload target is rebuilt, the -new definition will be included in the generated files. - -### Adding a new entry point - -When a new entry point is added (e.g. `offloadDeviceFoo`), the actual entry -point is automatically generated, which contains validation and tracing code. -It expects an implementation function (`offloadDeviceFoo_impl`) to be defined, -which it will call into. The definition of this implementation function should -be added to `src/offload_impl.cpp` diff --git a/offload/liboffload/CMakeLists.txt b/offload/liboffload/CMakeLists.txt deleted file mode 100644 index f582d9e15fc6e..0000000000000 --- a/offload/liboffload/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -add_subdirectory(API) - -add_llvm_library(LLVMOffload SHARED - src/OffloadLib.cpp - src/OffloadImpl.cpp) - -foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD) - target_link_libraries(LLVMOffload PRIVATE omptarget.rtl.${plugin}) -endforeach() - -if(LIBOMP_HAVE_VERSION_SCRIPT_FLAG) - target_link_libraries(LLVMOffload PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports") -endif() - -target_include_directories(LLVMOffload PUBLIC - ${CMAKE_CURRENT_BINARY_DIR}/../include - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include/generated - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ${CMAKE_CURRENT_SOURCE_DIR}/../plugins-nextgen/common/include) - -target_compile_options(LLVMOffload PRIVATE ${offload_compile_flags}) -target_link_options(LLVMOffload PRIVATE ${offload_link_flags}) - -target_compile_definitions(LLVMOffload PRIVATE - TARGET_NAME="Liboffload" - DEBUG_PREFIX="Liboffload" -) - -set_target_properties(LLVMOffload PROPERTIES - POSITION_INDEPENDENT_CODE ON - INSTALL_RPATH "$ORIGIN" - BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..") -install(TARGETS LLVMOffload LIBRARY COMPONENT LLVMOffload DESTINATION "${OFFLOAD_INSTALL_LIBDIR}") - -install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/generated/OffloadAPI.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include/offload) -install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/generated/OffloadPrint.hpp DESTINATION ${CMAKE_INSTALL_PREFIX}/include/offload) diff --git a/offload/liboffload/README.md b/offload/liboffload/README.md deleted file mode 100644 index 95c9bf54d7bad..0000000000000 --- a/offload/liboffload/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Offload New API - -This directory contains the implementation of the experimental work-in-progress -new API for Offload. It builds on top of the existing plugin implementations but -provides a single level of abstraction suitable for runtimes for languages other -than OpenMP to be built on top of. - -See the [API definition readme](API/README.md) for implementation details. \ No newline at end of file diff --git a/offload/liboffload/exports b/offload/liboffload/exports deleted file mode 100644 index 168341aa7d938..0000000000000 --- a/offload/liboffload/exports +++ /dev/null @@ -1,6 +0,0 @@ -VERS1.0 { -global: - ol*; -local: - *; -}; diff --git a/offload/liboffload/include/OffloadImpl.hpp b/offload/liboffload/include/OffloadImpl.hpp deleted file mode 100644 index 6d745095f3105..0000000000000 --- a/offload/liboffload/include/OffloadImpl.hpp +++ /dev/null @@ -1,94 +0,0 @@ -//===- offload_impl.hpp- Implementation helpers for the Offload library ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" - -struct OffloadConfig { - bool TracingEnabled = false; -}; - -OffloadConfig &offloadConfig(); - -// Use the StringSet container to efficiently deduplicate repeated error -// strings (e.g. if the same error is hit constantly in a long running program) -llvm::StringSet<> &errorStrs(); - -// Use an unordered_set to avoid duplicates of error structs themselves. -// We cannot store the structs directly as returned pointers to them must always -// be valid, and a rehash of the set may invalidate them. This requires -// custom hash and equal_to function objects. -using ErrPtrT = std::unique_ptr; -struct ErrPtrEqual { - bool operator()(const ErrPtrT &lhs, const ErrPtrT &rhs) const { - if (!lhs && !rhs) { - return true; - } - if (!lhs || !rhs) { - return false; - } - - bool StrsEqual = false; - if (lhs->Details == NULL && rhs->Details == NULL) { - StrsEqual = true; - } else if (lhs->Details != NULL && rhs->Details != NULL) { - StrsEqual = (std::strcmp(lhs->Details, rhs->Details) == 0); - } - return (lhs->Code == rhs->Code) && StrsEqual; - } -}; -struct ErrPtrHash { - size_t operator()(const ErrPtrT &e) const { - if (!e) { - // We shouldn't store empty errors (i.e. success), but just in case - return 0lu; - } else { - return std::hash{}(e->Code); - } - } -}; -using ErrSetT = std::unordered_set; -ErrSetT &errors(); - -struct ol_impl_result_t { - ol_impl_result_t(std::nullptr_t) : Result(OL_SUCCESS) {} - ol_impl_result_t(ol_errc_t Code) { - if (Code == OL_ERRC_SUCCESS) { - Result = nullptr; - } else { - auto Err = std::unique_ptr( - new ol_error_struct_t{Code, nullptr}); - Result = errors().emplace(std::move(Err)).first->get(); - } - } - - ol_impl_result_t(ol_errc_t Code, llvm::StringRef Details) { - assert(Code != OL_ERRC_SUCCESS); - Result = nullptr; - auto DetailsStr = errorStrs().insert(Details).first->getKeyData(); - auto Err = std::unique_ptr( - new ol_error_struct_t{Code, DetailsStr}); - Result = errors().emplace(std::move(Err)).first->get(); - } - - operator ol_result_t() { return Result; } - -private: - ol_result_t Result; -}; diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h deleted file mode 100644 index 11fcc96625ab8..0000000000000 --- a/offload/liboffload/include/generated/OffloadAPI.h +++ /dev/null @@ -1,610 +0,0 @@ -//===- Auto-generated file, part of the LLVM/Offload project --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Auto-generated file, do not manually edit. - -#pragma once - -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_VERSION_MAJOR -/// @brief Major version of the Offload API -#define OL_VERSION_MAJOR 0 -#endif // OL_VERSION_MAJOR - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_VERSION_MINOR -/// @brief Minor version of the Offload API -#define OL_VERSION_MINOR 0 -#endif // OL_VERSION_MINOR - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_VERSION_PATCH -/// @brief Patch version of the Offload API -#define OL_VERSION_PATCH 1 -#endif // OL_VERSION_PATCH - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_APICALL -#if defined(_WIN32) -/// @brief Calling convention for all API functions -#define OL_APICALL __cdecl -#else -#define OL_APICALL -#endif // defined(_WIN32) -#endif // OL_APICALL - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_APIEXPORT -#if defined(_WIN32) -/// @brief Microsoft-specific dllexport storage-class attribute -#define OL_APIEXPORT __declspec(dllexport) -#else -#define OL_APIEXPORT -#endif // defined(_WIN32) -#endif // OL_APIEXPORT - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_DLLEXPORT -#if defined(_WIN32) -/// @brief Microsoft-specific dllexport storage-class attribute -#define OL_DLLEXPORT __declspec(dllexport) -#endif // defined(_WIN32) -#endif // OL_DLLEXPORT - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_DLLEXPORT -#if __GNUC__ >= 4 -/// @brief GCC-specific dllexport storage-class attribute -#define OL_DLLEXPORT __attribute__((visibility("default"))) -#else -#define OL_DLLEXPORT -#endif // __GNUC__ >= 4 -#endif // OL_DLLEXPORT - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Handle of a platform instance -typedef struct ol_platform_handle_t_ *ol_platform_handle_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Handle of platform's device object -typedef struct ol_device_handle_t_ *ol_device_handle_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Handle of context object -typedef struct ol_context_handle_t_ *ol_context_handle_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Defines Return/Error codes -typedef enum ol_errc_t { - /// Success - OL_ERRC_SUCCESS = 0, - /// Invalid Value - OL_ERRC_INVALID_VALUE = 1, - /// Invalid platform - OL_ERRC_INVALID_PLATFORM = 2, - /// Device not found - OL_ERRC_DEVICE_NOT_FOUND = 3, - /// Invalid device - OL_ERRC_INVALID_DEVICE = 4, - /// Device hung, reset, was removed, or driver update occurred - OL_ERRC_DEVICE_LOST = 5, - /// plugin is not initialized or specific entry-point is not implemented - OL_ERRC_UNINITIALIZED = 6, - /// Out of resources - OL_ERRC_OUT_OF_RESOURCES = 7, - /// generic error code for unsupported versions - OL_ERRC_UNSUPPORTED_VERSION = 8, - /// generic error code for unsupported features - OL_ERRC_UNSUPPORTED_FEATURE = 9, - /// generic error code for invalid arguments - OL_ERRC_INVALID_ARGUMENT = 10, - /// handle argument is not valid - OL_ERRC_INVALID_NULL_HANDLE = 11, - /// pointer argument may not be nullptr - OL_ERRC_INVALID_NULL_POINTER = 12, - /// invalid size or dimensions (e.g., must not be zero, or is out of bounds) - OL_ERRC_INVALID_SIZE = 13, - /// enumerator argument is not valid - OL_ERRC_INVALID_ENUMERATION = 14, - /// enumerator argument is not supported by the device - OL_ERRC_UNSUPPORTED_ENUMERATION = 15, - /// Unknown or internal error - OL_ERRC_UNKNOWN = 16, - /// @cond - OL_ERRC_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ol_errc_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Details of the error condition returned by an API call -typedef struct ol_error_struct_t { - ol_errc_t Code; /// The error code - const char *Details; /// String containing error details -} ol_error_struct_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Result type returned by all entry points. -typedef const ol_error_struct_t *ol_result_t; - -/////////////////////////////////////////////////////////////////////////////// -#ifndef OL_SUCCESS -/// @brief Success condition -#define OL_SUCCESS NULL -#endif // OL_SUCCESS - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Code location information that can optionally be associated with an -/// API call -typedef struct ol_code_location_t { - const char *FunctionName; /// Function name - const char *SourceFile; /// Source code file - uint32_t LineNumber; /// Source code line number - uint32_t ColumnNumber; /// Source code column number -} ol_code_location_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Perform initialization of the Offload library and plugins -/// -/// @details -/// - This must be the first API call made by a user of the Offload library -/// - Each call will increment an internal reference count that is -/// decremented by `olShutDown` -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// - ::OL_ERRC_INVALID_NULL_POINTER -OL_APIEXPORT ol_result_t OL_APICALL olInit(); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Release the resources in use by Offload -/// -/// @details -/// - This decrements an internal reference count. When this reaches 0, all -/// resources will be released -/// - Subsequent API calls made after this are not valid -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// - ::OL_ERRC_INVALID_NULL_POINTER -OL_APIEXPORT ol_result_t OL_APICALL olShutDown(); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Retrieves all available platforms -/// -/// @details -/// - Multiple calls to this function will return identical platforms -/// handles, in the same order. -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_INVALID_SIZE -/// + `NumEntries == 0` -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == Platforms` -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatform( - // [in] The number of platforms to be added to Platforms. NumEntries must be - // greater than zero. - uint32_t NumEntries, - // [out] Array of handle of platforms. If NumEntries is less than the number - // of platforms available, then olGetPlatform shall only retrieve that - // number of platforms. - ol_platform_handle_t *Platforms); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Retrieves the number of available platforms -/// -/// @details -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == NumPlatforms` -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount( - // [out] returns the total number of platforms available. - uint32_t *NumPlatforms); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Supported platform info -typedef enum ol_platform_info_t { - /// [char[]] The string denoting name of the platform. The size of the info - /// needs to be dynamically queried. - OL_PLATFORM_INFO_NAME = 0, - /// [char[]] The string denoting name of the vendor of the platform. The size - /// of the info needs to be dynamically queried. - OL_PLATFORM_INFO_VENDOR_NAME = 1, - /// [char[]] The string denoting the version of the platform. The size of the - /// info needs to be dynamically queried. - OL_PLATFORM_INFO_VERSION = 2, - /// [ol_platform_backend_t] The native backend of the platform. - OL_PLATFORM_INFO_BACKEND = 3, - /// @cond - OL_PLATFORM_INFO_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ol_platform_info_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Identifies the native backend of the platform -typedef enum ol_platform_backend_t { - /// The backend is not recognized - OL_PLATFORM_BACKEND_UNKNOWN = 0, - /// The backend is CUDA - OL_PLATFORM_BACKEND_CUDA = 1, - /// The backend is AMDGPU - OL_PLATFORM_BACKEND_AMDGPU = 2, - /// @cond - OL_PLATFORM_BACKEND_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ol_platform_backend_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Queries the given property of the platform -/// -/// @details -/// - `olGetPlatformInfoSize` can be used to query the storage size required -/// for the given query. -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION -/// + If `PropName` is not supported by the platform. -/// - ::OL_ERRC_INVALID_SIZE -/// + `PropSize == 0` -/// + If `PropSize` is less than the real number of bytes needed to -/// return the info. -/// - ::OL_ERRC_INVALID_PLATFORM -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// + `NULL == Platform` -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == PropValue` -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfo( - // [in] handle of the platform - ol_platform_handle_t Platform, - // [in] type of the info to retrieve - ol_platform_info_t PropName, - // [in] the number of bytes pointed to by pPlatformInfo. - size_t PropSize, - // [out] array of bytes holding the info. If Size is not equal to or greater - // to the real number of bytes needed to return the info then the - // OL_ERRC_INVALID_SIZE error is returned and pPlatformInfo is not used. - void *PropValue); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Returns the storage size of the given platform query -/// -/// @details -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION -/// + If `PropName` is not supported by the platform. -/// - ::OL_ERRC_INVALID_PLATFORM -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// + `NULL == Platform` -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == PropSizeRet` -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoSize( - // [in] handle of the platform - ol_platform_handle_t Platform, - // [in] type of the info to query - ol_platform_info_t PropName, - // [out] pointer to the number of bytes required to store the query - size_t *PropSizeRet); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Supported device types -typedef enum ol_device_type_t { - /// The default device type as preferred by the runtime - OL_DEVICE_TYPE_DEFAULT = 0, - /// Devices of all types - OL_DEVICE_TYPE_ALL = 1, - /// GPU device type - OL_DEVICE_TYPE_GPU = 2, - /// CPU device type - OL_DEVICE_TYPE_CPU = 3, - /// @cond - OL_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ol_device_type_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Supported device info -typedef enum ol_device_info_t { - /// [ol_device_type_t] type of the device - OL_DEVICE_INFO_TYPE = 0, - /// [ol_platform_handle_t] the platform associated with the device - OL_DEVICE_INFO_PLATFORM = 1, - /// [char[]] Device name - OL_DEVICE_INFO_NAME = 2, - /// [char[]] Device vendor - OL_DEVICE_INFO_VENDOR = 3, - /// [char[]] Driver version - OL_DEVICE_INFO_DRIVER_VERSION = 4, - /// @cond - OL_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ol_device_info_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Retrieves the number of available devices within a platform -/// -/// @details -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// + `NULL == Platform` -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == NumDevices` -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceCount( - // [in] handle of the platform instance - ol_platform_handle_t Platform, - // [out] pointer to the number of devices. - uint32_t *NumDevices); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Retrieves devices within a platform -/// -/// @details -/// - Multiple calls to this function will return identical device handles, -/// in the same order. -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_INVALID_SIZE -/// + `NumEntries == 0` -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// + `NULL == Platform` -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == Devices` -OL_APIEXPORT ol_result_t OL_APICALL olGetDevice( - // [in] handle of the platform instance - ol_platform_handle_t Platform, - // [in] the number of devices to be added to phDevices, which must be - // greater than zero - uint32_t NumEntries, - // [out] Array of device handles. If NumEntries is less than the number of - // devices available, then this function shall only retrieve that number of - // devices. - ol_device_handle_t *Devices); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Queries the given property of the device -/// -/// @details -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION -/// + If `PropName` is not supported by the device. -/// - ::OL_ERRC_INVALID_SIZE -/// + `PropSize == 0` -/// + If `PropSize` is less than the real number of bytes needed to -/// return the info. -/// - ::OL_ERRC_INVALID_DEVICE -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// + `NULL == Device` -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == PropValue` -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo( - // [in] handle of the device instance - ol_device_handle_t Device, - // [in] type of the info to retrieve - ol_device_info_t PropName, - // [in] the number of bytes pointed to by PropValue. - size_t PropSize, - // [out] array of bytes holding the info. If PropSize is not equal to or - // greater than the real number of bytes needed to return the info then the - // OL_ERRC_INVALID_SIZE error is returned and PropValue is not used. - void *PropValue); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Returns the storage size of the given device query -/// -/// @details -/// -/// @returns -/// - ::OL_RESULT_SUCCESS -/// - ::OL_ERRC_UNINITIALIZED -/// - ::OL_ERRC_DEVICE_LOST -/// - ::OL_ERRC_UNSUPPORTED_ENUMERATION -/// + If `PropName` is not supported by the device. -/// - ::OL_ERRC_INVALID_DEVICE -/// - ::OL_ERRC_INVALID_NULL_HANDLE -/// + `NULL == Device` -/// - ::OL_ERRC_INVALID_NULL_POINTER -/// + `NULL == PropSizeRet` -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize( - // [in] handle of the device instance - ol_device_handle_t Device, - // [in] type of the info to retrieve - ol_device_info_t PropName, - // [out] pointer to the number of bytes required to store the query - size_t *PropSizeRet); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetPlatform -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_platform_params_t { - uint32_t *pNumEntries; - ol_platform_handle_t **pPlatforms; -} ol_get_platform_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetPlatformCount -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_platform_count_params_t { - uint32_t **pNumPlatforms; -} ol_get_platform_count_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetPlatformInfo -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_platform_info_params_t { - ol_platform_handle_t *pPlatform; - ol_platform_info_t *pPropName; - size_t *pPropSize; - void **pPropValue; -} ol_get_platform_info_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetPlatformInfoSize -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_platform_info_size_params_t { - ol_platform_handle_t *pPlatform; - ol_platform_info_t *pPropName; - size_t **pPropSizeRet; -} ol_get_platform_info_size_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetDeviceCount -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_device_count_params_t { - ol_platform_handle_t *pPlatform; - uint32_t **pNumDevices; -} ol_get_device_count_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetDevice -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_device_params_t { - ol_platform_handle_t *pPlatform; - uint32_t *pNumEntries; - ol_device_handle_t **pDevices; -} ol_get_device_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetDeviceInfo -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_device_info_params_t { - ol_device_handle_t *pDevice; - ol_device_info_t *pPropName; - size_t *pPropSize; - void **pPropValue; -} ol_get_device_info_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for olGetDeviceInfoSize -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct ol_get_device_info_size_params_t { - ol_device_handle_t *pDevice; - ol_device_info_t *pPropName; - size_t **pPropSizeRet; -} ol_get_device_info_size_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olInit that also sets source code location information -/// @details See also ::olInit -OL_APIEXPORT ol_result_t OL_APICALL -olInitWithCodeLoc(ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olShutDown that also sets source code location information -/// @details See also ::olShutDown -OL_APIEXPORT ol_result_t OL_APICALL -olShutDownWithCodeLoc(ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetPlatform that also sets source code location -/// information -/// @details See also ::olGetPlatform -OL_APIEXPORT ol_result_t OL_APICALL -olGetPlatformWithCodeLoc(uint32_t NumEntries, ol_platform_handle_t *Platforms, - ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetPlatformCount that also sets source code location -/// information -/// @details See also ::olGetPlatformCount -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCountWithCodeLoc( - uint32_t *NumPlatforms, ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetPlatformInfo that also sets source code location -/// information -/// @details See also ::olGetPlatformInfo -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoWithCodeLoc( - ol_platform_handle_t Platform, ol_platform_info_t PropName, size_t PropSize, - void *PropValue, ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetPlatformInfoSize that also sets source code location -/// information -/// @details See also ::olGetPlatformInfoSize -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformInfoSizeWithCodeLoc( - ol_platform_handle_t Platform, ol_platform_info_t PropName, - size_t *PropSizeRet, ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetDeviceCount that also sets source code location -/// information -/// @details See also ::olGetDeviceCount -OL_APIEXPORT ol_result_t OL_APICALL -olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform, uint32_t *NumDevices, - ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetDevice that also sets source code location -/// information -/// @details See also ::olGetDevice -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceWithCodeLoc( - ol_platform_handle_t Platform, uint32_t NumEntries, - ol_device_handle_t *Devices, ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetDeviceInfo that also sets source code location -/// information -/// @details See also ::olGetDeviceInfo -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoWithCodeLoc( - ol_device_handle_t Device, ol_device_info_t PropName, size_t PropSize, - void *PropValue, ol_code_location_t *CodeLocation); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of olGetDeviceInfoSize that also sets source code location -/// information -/// @details See also ::olGetDeviceInfoSize -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc( - ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet, - ol_code_location_t *CodeLocation); - -#if defined(__cplusplus) -} // extern "C" -#endif diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc deleted file mode 100644 index 49c1c8169615e..0000000000000 --- a/offload/liboffload/include/generated/OffloadEntryPoints.inc +++ /dev/null @@ -1,441 +0,0 @@ -//===- Auto-generated file, part of the LLVM/Offload project --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olInit_val() { - if (true /*enableParameterValidation*/) { - } - - return olInit_impl(); -} -OL_APIEXPORT ol_result_t OL_APICALL olInit() { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olInit"; - } - - ol_result_t Result = olInit_val(); - - if (offloadConfig().TracingEnabled) { - std::cout << "()"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olInitWithCodeLoc(ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olInit(); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olShutDown_val() { - if (true /*enableParameterValidation*/) { - } - - return olShutDown_impl(); -} -OL_APIEXPORT ol_result_t OL_APICALL olShutDown() { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olShutDown"; - } - - ol_result_t Result = olShutDown_val(); - - if (offloadConfig().TracingEnabled) { - std::cout << "()"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olShutDownWithCodeLoc(ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olShutDown(); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetPlatform_val(uint32_t NumEntries, - ol_platform_handle_t *Platforms) { - if (true /*enableParameterValidation*/) { - if (NumEntries == 0) { - return OL_ERRC_INVALID_SIZE; - } - - if (NULL == Platforms) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetPlatform_impl(NumEntries, Platforms); -} -OL_APIEXPORT ol_result_t OL_APICALL -olGetPlatform(uint32_t NumEntries, ol_platform_handle_t *Platforms) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetPlatform"; - } - - ol_result_t Result = olGetPlatform_val(NumEntries, Platforms); - - if (offloadConfig().TracingEnabled) { - ol_get_platform_params_t Params = {&NumEntries, &Platforms}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetPlatformWithCodeLoc(uint32_t NumEntries, - ol_platform_handle_t *Platforms, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetPlatform(NumEntries, Platforms); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetPlatformCount_val(uint32_t *NumPlatforms) { - if (true /*enableParameterValidation*/) { - if (NULL == NumPlatforms) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetPlatformCount_impl(NumPlatforms); -} -OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(uint32_t *NumPlatforms) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetPlatformCount"; - } - - ol_result_t Result = olGetPlatformCount_val(NumPlatforms); - - if (offloadConfig().TracingEnabled) { - ol_get_platform_count_params_t Params = {&NumPlatforms}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetPlatformCountWithCodeLoc(uint32_t *NumPlatforms, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetPlatformCount(NumPlatforms); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetPlatformInfo_val(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t PropSize, void *PropValue) { - if (true /*enableParameterValidation*/) { - if (PropSize == 0) { - return OL_ERRC_INVALID_SIZE; - } - - if (NULL == Platform) { - return OL_ERRC_INVALID_NULL_HANDLE; - } - - if (NULL == PropValue) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetPlatformInfo_impl(Platform, PropName, PropSize, PropValue); -} -OL_APIEXPORT ol_result_t OL_APICALL -olGetPlatformInfo(ol_platform_handle_t Platform, ol_platform_info_t PropName, - size_t PropSize, void *PropValue) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetPlatformInfo"; - } - - ol_result_t Result = - olGetPlatformInfo_val(Platform, PropName, PropSize, PropValue); - - if (offloadConfig().TracingEnabled) { - ol_get_platform_info_params_t Params = {&Platform, &PropName, &PropSize, - &PropValue}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetPlatformInfoWithCodeLoc(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t PropSize, void *PropValue, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = - olGetPlatformInfo(Platform, PropName, PropSize, PropValue); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetPlatformInfoSize_val(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t *PropSizeRet) { - if (true /*enableParameterValidation*/) { - if (NULL == Platform) { - return OL_ERRC_INVALID_NULL_HANDLE; - } - - if (NULL == PropSizeRet) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetPlatformInfoSize_impl(Platform, PropName, PropSizeRet); -} -OL_APIEXPORT ol_result_t OL_APICALL -olGetPlatformInfoSize(ol_platform_handle_t Platform, - ol_platform_info_t PropName, size_t *PropSizeRet) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetPlatformInfoSize"; - } - - ol_result_t Result = - olGetPlatformInfoSize_val(Platform, PropName, PropSizeRet); - - if (offloadConfig().TracingEnabled) { - ol_get_platform_info_size_params_t Params = {&Platform, &PropName, - &PropSizeRet}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetPlatformInfoSizeWithCodeLoc(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t *PropSizeRet, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetPlatformInfoSize(Platform, PropName, PropSizeRet); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetDeviceCount_val(ol_platform_handle_t Platform, - uint32_t *NumDevices) { - if (true /*enableParameterValidation*/) { - if (NULL == Platform) { - return OL_ERRC_INVALID_NULL_HANDLE; - } - - if (NULL == NumDevices) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetDeviceCount_impl(Platform, NumDevices); -} -OL_APIEXPORT ol_result_t OL_APICALL -olGetDeviceCount(ol_platform_handle_t Platform, uint32_t *NumDevices) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetDeviceCount"; - } - - ol_result_t Result = olGetDeviceCount_val(Platform, NumDevices); - - if (offloadConfig().TracingEnabled) { - ol_get_device_count_params_t Params = {&Platform, &NumDevices}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetDeviceCountWithCodeLoc(ol_platform_handle_t Platform, - uint32_t *NumDevices, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetDeviceCount(Platform, NumDevices); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetDevice_val(ol_platform_handle_t Platform, - uint32_t NumEntries, - ol_device_handle_t *Devices) { - if (true /*enableParameterValidation*/) { - if (NumEntries == 0) { - return OL_ERRC_INVALID_SIZE; - } - - if (NULL == Platform) { - return OL_ERRC_INVALID_NULL_HANDLE; - } - - if (NULL == Devices) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetDevice_impl(Platform, NumEntries, Devices); -} -OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(ol_platform_handle_t Platform, - uint32_t NumEntries, - ol_device_handle_t *Devices) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetDevice"; - } - - ol_result_t Result = olGetDevice_val(Platform, NumEntries, Devices); - - if (offloadConfig().TracingEnabled) { - ol_get_device_params_t Params = {&Platform, &NumEntries, &Devices}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetDeviceWithCodeLoc(ol_platform_handle_t Platform, - uint32_t NumEntries, - ol_device_handle_t *Devices, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetDevice(Platform, NumEntries, Devices); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetDeviceInfo_val(ol_device_handle_t Device, - ol_device_info_t PropName, size_t PropSize, - void *PropValue) { - if (true /*enableParameterValidation*/) { - if (PropSize == 0) { - return OL_ERRC_INVALID_SIZE; - } - - if (NULL == Device) { - return OL_ERRC_INVALID_NULL_HANDLE; - } - - if (NULL == PropValue) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetDeviceInfo_impl(Device, PropName, PropSize, PropValue); -} -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t PropSize, - void *PropValue) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetDeviceInfo"; - } - - ol_result_t Result = - olGetDeviceInfo_val(Device, PropName, PropSize, PropValue); - - if (offloadConfig().TracingEnabled) { - ol_get_device_info_params_t Params = {&Device, &PropName, &PropSize, - &PropValue}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetDeviceInfoWithCodeLoc(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t PropSize, void *PropValue, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetDeviceInfo(Device, PropName, PropSize, PropValue); - - currentCodeLocation() = nullptr; - return Result; -} - -/////////////////////////////////////////////////////////////////////////////// -ol_impl_result_t olGetDeviceInfoSize_val(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t *PropSizeRet) { - if (true /*enableParameterValidation*/) { - if (NULL == Device) { - return OL_ERRC_INVALID_NULL_HANDLE; - } - - if (NULL == PropSizeRet) { - return OL_ERRC_INVALID_NULL_POINTER; - } - } - - return olGetDeviceInfoSize_impl(Device, PropName, PropSizeRet); -} -OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize( - ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet) { - if (offloadConfig().TracingEnabled) { - std::cout << "---> olGetDeviceInfoSize"; - } - - ol_result_t Result = olGetDeviceInfoSize_val(Device, PropName, PropSizeRet); - - if (offloadConfig().TracingEnabled) { - ol_get_device_info_size_params_t Params = {&Device, &PropName, - &PropSizeRet}; - std::cout << "(" << &Params << ")"; - std::cout << "-> " << Result << "\n"; - if (Result && Result->Details) { - std::cout << " *Error Details* " << Result->Details << " \n"; - } - } - return Result; -} -ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t *PropSizeRet, - ol_code_location_t *CodeLocation) { - currentCodeLocation() = CodeLocation; - ol_result_t Result = olGetDeviceInfoSize(Device, PropName, PropSizeRet); - - currentCodeLocation() = nullptr; - return Result; -} diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc deleted file mode 100644 index 48115493c790f..0000000000000 --- a/offload/liboffload/include/generated/OffloadFuncs.inc +++ /dev/null @@ -1,34 +0,0 @@ -//===- Auto-generated file, part of the LLVM/Offload project --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef OFFLOAD_FUNC -#error Please define the macro OFFLOAD_FUNC(Function) -#endif - -OFFLOAD_FUNC(olInit) -OFFLOAD_FUNC(olShutDown) -OFFLOAD_FUNC(olGetPlatform) -OFFLOAD_FUNC(olGetPlatformCount) -OFFLOAD_FUNC(olGetPlatformInfo) -OFFLOAD_FUNC(olGetPlatformInfoSize) -OFFLOAD_FUNC(olGetDeviceCount) -OFFLOAD_FUNC(olGetDevice) -OFFLOAD_FUNC(olGetDeviceInfo) -OFFLOAD_FUNC(olGetDeviceInfoSize) -OFFLOAD_FUNC(olInitWithCodeLoc) -OFFLOAD_FUNC(olShutDownWithCodeLoc) -OFFLOAD_FUNC(olGetPlatformWithCodeLoc) -OFFLOAD_FUNC(olGetPlatformCountWithCodeLoc) -OFFLOAD_FUNC(olGetPlatformInfoWithCodeLoc) -OFFLOAD_FUNC(olGetPlatformInfoSizeWithCodeLoc) -OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc) -OFFLOAD_FUNC(olGetDeviceWithCodeLoc) -OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc) -OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc) - -#undef OFFLOAD_FUNC diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc deleted file mode 100644 index 5b26b2653a05d..0000000000000 --- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc +++ /dev/null @@ -1,38 +0,0 @@ -//===- Auto-generated file, part of the LLVM/Offload project --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -ol_impl_result_t olInit_impl(); - -ol_impl_result_t olShutDown_impl(); - -ol_impl_result_t olGetPlatform_impl(uint32_t NumEntries, - ol_platform_handle_t *Platforms); - -ol_impl_result_t olGetPlatformCount_impl(uint32_t *NumPlatforms); - -ol_impl_result_t olGetPlatformInfo_impl(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t PropSize, void *PropValue); - -ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t *PropSizeRet); - -ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform, - uint32_t *NumDevices); - -ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform, - uint32_t NumEntries, - ol_device_handle_t *Devices); - -ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t PropSize, void *PropValue); - -ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t *PropSizeRet); diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp deleted file mode 100644 index 8981bb054a4cb..0000000000000 --- a/offload/liboffload/include/generated/OffloadPrint.hpp +++ /dev/null @@ -1,428 +0,0 @@ -//===- Auto-generated file, part of the LLVM/Offload project --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Auto-generated file, do not manually edit. - -#pragma once - -#include -#include - -template -inline ol_result_t printPtr(std::ostream &os, const T *ptr); -template -inline void printTagged(std::ostream &os, const void *ptr, T value, - size_t size); -template struct is_handle : std::false_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; -template inline constexpr bool is_handle_v = is_handle::value; - -inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ol_platform_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ol_platform_backend_t value); -inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value); -inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ol_errc_t type -/// @returns std::ostream & -inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value) { - switch (value) { - case OL_ERRC_SUCCESS: - os << "OL_ERRC_SUCCESS"; - break; - case OL_ERRC_INVALID_VALUE: - os << "OL_ERRC_INVALID_VALUE"; - break; - case OL_ERRC_INVALID_PLATFORM: - os << "OL_ERRC_INVALID_PLATFORM"; - break; - case OL_ERRC_DEVICE_NOT_FOUND: - os << "OL_ERRC_DEVICE_NOT_FOUND"; - break; - case OL_ERRC_INVALID_DEVICE: - os << "OL_ERRC_INVALID_DEVICE"; - break; - case OL_ERRC_DEVICE_LOST: - os << "OL_ERRC_DEVICE_LOST"; - break; - case OL_ERRC_UNINITIALIZED: - os << "OL_ERRC_UNINITIALIZED"; - break; - case OL_ERRC_OUT_OF_RESOURCES: - os << "OL_ERRC_OUT_OF_RESOURCES"; - break; - case OL_ERRC_UNSUPPORTED_VERSION: - os << "OL_ERRC_UNSUPPORTED_VERSION"; - break; - case OL_ERRC_UNSUPPORTED_FEATURE: - os << "OL_ERRC_UNSUPPORTED_FEATURE"; - break; - case OL_ERRC_INVALID_ARGUMENT: - os << "OL_ERRC_INVALID_ARGUMENT"; - break; - case OL_ERRC_INVALID_NULL_HANDLE: - os << "OL_ERRC_INVALID_NULL_HANDLE"; - break; - case OL_ERRC_INVALID_NULL_POINTER: - os << "OL_ERRC_INVALID_NULL_POINTER"; - break; - case OL_ERRC_INVALID_SIZE: - os << "OL_ERRC_INVALID_SIZE"; - break; - case OL_ERRC_INVALID_ENUMERATION: - os << "OL_ERRC_INVALID_ENUMERATION"; - break; - case OL_ERRC_UNSUPPORTED_ENUMERATION: - os << "OL_ERRC_UNSUPPORTED_ENUMERATION"; - break; - case OL_ERRC_UNKNOWN: - os << "OL_ERRC_UNKNOWN"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ol_platform_info_t type -/// @returns std::ostream & -inline std::ostream &operator<<(std::ostream &os, - enum ol_platform_info_t value) { - switch (value) { - case OL_PLATFORM_INFO_NAME: - os << "OL_PLATFORM_INFO_NAME"; - break; - case OL_PLATFORM_INFO_VENDOR_NAME: - os << "OL_PLATFORM_INFO_VENDOR_NAME"; - break; - case OL_PLATFORM_INFO_VERSION: - os << "OL_PLATFORM_INFO_VERSION"; - break; - case OL_PLATFORM_INFO_BACKEND: - os << "OL_PLATFORM_INFO_BACKEND"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print type-tagged ol_platform_info_t enum value -/// @returns std::ostream & -template <> -inline void printTagged(std::ostream &os, const void *ptr, - ol_platform_info_t value, size_t size) { - if (ptr == NULL) { - printPtr(os, ptr); - return; - } - - switch (value) { - case OL_PLATFORM_INFO_NAME: { - printPtr(os, (const char *)ptr); - break; - } - case OL_PLATFORM_INFO_VENDOR_NAME: { - printPtr(os, (const char *)ptr); - break; - } - case OL_PLATFORM_INFO_VERSION: { - printPtr(os, (const char *)ptr); - break; - } - case OL_PLATFORM_INFO_BACKEND: { - const ol_platform_backend_t *const tptr = - (const ol_platform_backend_t *const)ptr; - os << (const void *)tptr << " ("; - os << *tptr; - os << ")"; - break; - } - default: - os << "unknown enumerator"; - break; - } -} -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ol_platform_backend_t type -/// @returns std::ostream & -inline std::ostream &operator<<(std::ostream &os, - enum ol_platform_backend_t value) { - switch (value) { - case OL_PLATFORM_BACKEND_UNKNOWN: - os << "OL_PLATFORM_BACKEND_UNKNOWN"; - break; - case OL_PLATFORM_BACKEND_CUDA: - os << "OL_PLATFORM_BACKEND_CUDA"; - break; - case OL_PLATFORM_BACKEND_AMDGPU: - os << "OL_PLATFORM_BACKEND_AMDGPU"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ol_device_type_t type -/// @returns std::ostream & -inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value) { - switch (value) { - case OL_DEVICE_TYPE_DEFAULT: - os << "OL_DEVICE_TYPE_DEFAULT"; - break; - case OL_DEVICE_TYPE_ALL: - os << "OL_DEVICE_TYPE_ALL"; - break; - case OL_DEVICE_TYPE_GPU: - os << "OL_DEVICE_TYPE_GPU"; - break; - case OL_DEVICE_TYPE_CPU: - os << "OL_DEVICE_TYPE_CPU"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ol_device_info_t type -/// @returns std::ostream & -inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value) { - switch (value) { - case OL_DEVICE_INFO_TYPE: - os << "OL_DEVICE_INFO_TYPE"; - break; - case OL_DEVICE_INFO_PLATFORM: - os << "OL_DEVICE_INFO_PLATFORM"; - break; - case OL_DEVICE_INFO_NAME: - os << "OL_DEVICE_INFO_NAME"; - break; - case OL_DEVICE_INFO_VENDOR: - os << "OL_DEVICE_INFO_VENDOR"; - break; - case OL_DEVICE_INFO_DRIVER_VERSION: - os << "OL_DEVICE_INFO_DRIVER_VERSION"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print type-tagged ol_device_info_t enum value -/// @returns std::ostream & -template <> -inline void printTagged(std::ostream &os, const void *ptr, - ol_device_info_t value, size_t size) { - if (ptr == NULL) { - printPtr(os, ptr); - return; - } - - switch (value) { - case OL_DEVICE_INFO_TYPE: { - const ol_device_type_t *const tptr = (const ol_device_type_t *const)ptr; - os << (const void *)tptr << " ("; - os << *tptr; - os << ")"; - break; - } - case OL_DEVICE_INFO_PLATFORM: { - const ol_platform_handle_t *const tptr = - (const ol_platform_handle_t *const)ptr; - os << (const void *)tptr << " ("; - os << *tptr; - os << ")"; - break; - } - case OL_DEVICE_INFO_NAME: { - printPtr(os, (const char *)ptr); - break; - } - case OL_DEVICE_INFO_VENDOR: { - printPtr(os, (const char *)ptr); - break; - } - case OL_DEVICE_INFO_DRIVER_VERSION: { - printPtr(os, (const char *)ptr); - break; - } - default: - os << "unknown enumerator"; - break; - } -} - -inline std::ostream &operator<<(std::ostream &os, - const ol_error_struct_t *Err) { - if (Err == nullptr) { - os << "OL_SUCCESS"; - } else { - os << Err->Code; - } - return os; -} - -inline std::ostream &operator<<(std::ostream &os, - const struct ol_get_platform_params_t *params) { - os << ".NumEntries = "; - os << *params->pNumEntries; - os << ", "; - os << ".Platforms = "; - os << "{"; - for (size_t i = 0; i < *params->pNumEntries; i++) { - if (i > 0) { - os << ", "; - } - printPtr(os, (*params->pPlatforms)[i]); - } - os << "}"; - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ol_get_platform_count_params_t *params) { - os << ".NumPlatforms = "; - printPtr(os, *params->pNumPlatforms); - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ol_get_platform_info_params_t *params) { - os << ".Platform = "; - printPtr(os, *params->pPlatform); - os << ", "; - os << ".PropName = "; - os << *params->pPropName; - os << ", "; - os << ".PropSize = "; - os << *params->pPropSize; - os << ", "; - os << ".PropValue = "; - printTagged(os, *params->pPropValue, *params->pPropName, *params->pPropSize); - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ol_get_platform_info_size_params_t *params) { - os << ".Platform = "; - printPtr(os, *params->pPlatform); - os << ", "; - os << ".PropName = "; - os << *params->pPropName; - os << ", "; - os << ".PropSizeRet = "; - printPtr(os, *params->pPropSizeRet); - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ol_get_device_count_params_t *params) { - os << ".Platform = "; - printPtr(os, *params->pPlatform); - os << ", "; - os << ".NumDevices = "; - printPtr(os, *params->pNumDevices); - return os; -} - -inline std::ostream &operator<<(std::ostream &os, - const struct ol_get_device_params_t *params) { - os << ".Platform = "; - printPtr(os, *params->pPlatform); - os << ", "; - os << ".NumEntries = "; - os << *params->pNumEntries; - os << ", "; - os << ".Devices = "; - os << "{"; - for (size_t i = 0; i < *params->pNumEntries; i++) { - if (i > 0) { - os << ", "; - } - printPtr(os, (*params->pDevices)[i]); - } - os << "}"; - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, const struct ol_get_device_info_params_t *params) { - os << ".Device = "; - printPtr(os, *params->pDevice); - os << ", "; - os << ".PropName = "; - os << *params->pPropName; - os << ", "; - os << ".PropSize = "; - os << *params->pPropSize; - os << ", "; - os << ".PropValue = "; - printTagged(os, *params->pPropValue, *params->pPropName, *params->pPropSize); - return os; -} - -inline std::ostream & -operator<<(std::ostream &os, - const struct ol_get_device_info_size_params_t *params) { - os << ".Device = "; - printPtr(os, *params->pDevice); - os << ", "; - os << ".PropName = "; - os << *params->pPropName; - os << ", "; - os << ".PropSizeRet = "; - printPtr(os, *params->pPropSizeRet); - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -// @brief Print pointer value -template -inline ol_result_t printPtr(std::ostream &os, const T *ptr) { - if (ptr == nullptr) { - os << "nullptr"; - } else if constexpr (std::is_pointer_v) { - os << (const void *)(ptr) << " ("; - printPtr(os, *ptr); - os << ")"; - } else if constexpr (std::is_void_v || is_handle_v) { - os << (const void *)ptr; - } else if constexpr (std::is_same_v, char>) { - os << (const void *)(ptr) << " ("; - os << ptr; - os << ")"; - } else { - os << (const void *)(ptr) << " ("; - os << *ptr; - os << ")"; - } - - return OL_SUCCESS; -} diff --git a/offload/liboffload/src/Helpers.hpp b/offload/liboffload/src/Helpers.hpp deleted file mode 100644 index d003d30252462..0000000000000 --- a/offload/liboffload/src/Helpers.hpp +++ /dev/null @@ -1,95 +0,0 @@ -//===- helpers.hpp- GetInfo return helpers for the new LLVM/Offload API ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The getInfo*/ReturnHelper facilities provide shortcut way of writing return -// data + size for the various getInfo APIs. Based on the equivalent -// implementations in Unified Runtime. -// -//===----------------------------------------------------------------------===// - -#include "OffloadAPI.h" - -#include - -template -ol_errc_t getInfoImpl(size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet, T Value, size_t ValueSize, - Assign &&AssignFunc) { - if (!ParamValue && !ParamValueSizeRet) { - return OL_ERRC_INVALID_NULL_POINTER; - } - - if (ParamValue != nullptr) { - if (ParamValueSize < ValueSize) { - return OL_ERRC_INVALID_SIZE; - } - AssignFunc(ParamValue, Value, ValueSize); - } - - if (ParamValueSizeRet != nullptr) { - *ParamValueSizeRet = ValueSize; - } - - return OL_ERRC_SUCCESS; -} - -template -ol_errc_t getInfo(size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet, T Value) { - auto Assignment = [](void *ParamValue, T Value, size_t) { - *static_cast(ParamValue) = Value; - }; - - return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value, - sizeof(T), Assignment); -} - -template -ol_errc_t getInfoArray(size_t array_length, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet, - const T *Value) { - return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value, - array_length * sizeof(T), memcpy); -} - -template <> -inline ol_errc_t getInfo(size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet, - const char *Value) { - return getInfoArray(strlen(Value) + 1, ParamValueSize, ParamValue, - ParamValueSizeRet, Value); -} - -class ReturnHelper { -public: - ReturnHelper(size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) - : ParamValueSize(ParamValueSize), ParamValue(ParamValue), - ParamValueSizeRet(ParamValueSizeRet) {} - - // A version where in/out info size is represented by a single pointer - // to a value which is updated on return - ReturnHelper(size_t *ParamValueSize, void *ParamValue) - : ParamValueSize(*ParamValueSize), ParamValue(ParamValue), - ParamValueSizeRet(ParamValueSize) {} - - // Scalar return Value - template ol_errc_t operator()(const T &t) { - return getInfo(ParamValueSize, ParamValue, ParamValueSizeRet, t); - } - - // Array return Value - template ol_errc_t operator()(const T *t, size_t s) { - return getInfoArray(s, ParamValueSize, ParamValue, ParamValueSizeRet, t); - } - -protected: - size_t ParamValueSize; - void *ParamValue; - size_t *ParamValueSizeRet; -}; diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp deleted file mode 100644 index 457f1053f1634..0000000000000 --- a/offload/liboffload/src/OffloadImpl.cpp +++ /dev/null @@ -1,247 +0,0 @@ -//===- ol_impl.cpp - Implementation of the new LLVM/Offload API ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This contains the definitions of the new LLVM/Offload API entry points. See -// new-api/API/README.md for more information. -// -//===----------------------------------------------------------------------===// - -#include "OffloadImpl.hpp" -#include "Helpers.hpp" -#include "PluginManager.h" -#include "llvm/Support/FormatVariadic.h" -#include - -#include - -using namespace llvm; -using namespace llvm::omp::target::plugin; - -// Handle type definitions. Ideally these would be 1:1 with the plugins -struct ol_device_handle_t_ { - int DeviceNum; - GenericDeviceTy &Device; - ol_platform_handle_t Platform; -}; - -struct ol_platform_handle_t_ { - std::unique_ptr Plugin; - std::vector Devices; -}; - -using PlatformVecT = SmallVector; -PlatformVecT &Platforms() { - static PlatformVecT Platforms; - return Platforms; -} - -// TODO: Some plugins expect to be linked into libomptarget which defines these -// symbols to implement ompt callbacks. The least invasive workaround here is to -// define them in libLLVMOffload as false/null so they are never used. In future -// it would be better to allow the plugins to implement callbacks without -// pulling in details from libomptarget. -#ifdef OMPT_SUPPORT -namespace llvm::omp::target { -namespace ompt { -bool Initialized = false; -ompt_get_callback_t lookupCallbackByCode = nullptr; -ompt_function_lookup_t lookupCallbackByName = nullptr; -} // namespace ompt -} // namespace llvm::omp::target -#endif - -// Every plugin exports this method to create an instance of the plugin type. -#define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name(); -#include "Shared/Targets.def" - -void initPlugins() { - // Attempt to create an instance of each supported plugin. -#define PLUGIN_TARGET(Name) \ - do { \ - Platforms().emplace_back(ol_platform_handle_t_{ \ - std::unique_ptr(createPlugin_##Name()), {}}); \ - } while (false); -#include "Shared/Targets.def" - - // Preemptively initialize all devices in the plugin so we can just return - // them from deviceGet - for (auto &Platform : Platforms()) { - auto Err = Platform.Plugin->init(); - [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); - for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices(); - DevNum++) { - if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) { - Platform.Devices.emplace_back(ol_device_handle_t_{ - DevNum, Platform.Plugin->getDevice(DevNum), &Platform}); - } - } - } - - offloadConfig().TracingEnabled = std::getenv("OFFLOAD_TRACE"); -} - -// TODO: We can properly reference count here and manage the resources in a more -// clever way -ol_impl_result_t olInit_impl() { - static std::once_flag InitFlag; - std::call_once(InitFlag, initPlugins); - - return OL_SUCCESS; -} -ol_impl_result_t olShutDown_impl() { return OL_SUCCESS; } - -ol_impl_result_t olGetPlatformCount_impl(uint32_t *NumPlatforms) { - *NumPlatforms = Platforms().size(); - return OL_SUCCESS; -} - -ol_impl_result_t olGetPlatform_impl(uint32_t NumEntries, - ol_platform_handle_t *PlatformsOut) { - if (NumEntries > Platforms().size()) { - return {OL_ERRC_INVALID_SIZE, - std::string{formatv("{0} platform(s) available but {1} requested.", - Platforms().size(), NumEntries)}}; - } - - for (uint32_t PlatformIndex = 0; PlatformIndex < NumEntries; - PlatformIndex++) { - PlatformsOut[PlatformIndex] = &(Platforms())[PlatformIndex]; - } - - return OL_SUCCESS; -} - -ol_impl_result_t olGetPlatformInfoImplDetail(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t PropSize, void *PropValue, - size_t *PropSizeRet) { - ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - - switch (PropName) { - case OL_PLATFORM_INFO_NAME: - return ReturnValue(Platform->Plugin->getName()); - case OL_PLATFORM_INFO_VENDOR_NAME: - // TODO: Implement this - return ReturnValue("Unknown platform vendor"); - case OL_PLATFORM_INFO_VERSION: { - return ReturnValue(formatv("v{0}.{1}.{2}", OL_VERSION_MAJOR, - OL_VERSION_MINOR, OL_VERSION_PATCH) - .str() - .c_str()); - } - case OL_PLATFORM_INFO_BACKEND: { - auto PluginName = Platform->Plugin->getName(); - if (PluginName == StringRef("CUDA")) { - return ReturnValue(OL_PLATFORM_BACKEND_CUDA); - } else if (PluginName == StringRef("AMDGPU")) { - return ReturnValue(OL_PLATFORM_BACKEND_AMDGPU); - } else { - return ReturnValue(OL_PLATFORM_BACKEND_UNKNOWN); - } - } - default: - return OL_ERRC_INVALID_ENUMERATION; - } - - return OL_SUCCESS; -} - -ol_impl_result_t olGetPlatformInfo_impl(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t PropSize, void *PropValue) { - return olGetPlatformInfoImplDetail(Platform, PropName, PropSize, PropValue, - nullptr); -} - -ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform, - ol_platform_info_t PropName, - size_t *PropSizeRet) { - return olGetPlatformInfoImplDetail(Platform, PropName, 0, nullptr, - PropSizeRet); -} - -ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform, - uint32_t *pNumDevices) { - *pNumDevices = static_cast(Platform->Devices.size()); - - return OL_SUCCESS; -} - -ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform, - uint32_t NumEntries, - ol_device_handle_t *Devices) { - if (NumEntries > Platform->Devices.size()) { - return OL_ERRC_INVALID_SIZE; - } - - for (uint32_t DeviceIndex = 0; DeviceIndex < NumEntries; DeviceIndex++) { - Devices[DeviceIndex] = &(Platform->Devices[DeviceIndex]); - } - - return OL_SUCCESS; -} - -ol_impl_result_t olGetDeviceInfoImplDetail(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t PropSize, void *PropValue, - size_t *PropSizeRet) { - - ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - - InfoQueueTy DevInfo; - if (auto Err = Device->Device.obtainInfoImpl(DevInfo)) - return OL_ERRC_OUT_OF_RESOURCES; - - // Find the info if it exists under any of the given names - auto GetInfo = [&DevInfo](std::vector Names) { - for (auto Name : Names) { - auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) { - return Info.Key == Name; - }; - auto Item = std::find_if(DevInfo.getQueue().begin(), - DevInfo.getQueue().end(), InfoKeyMatches); - - if (Item != std::end(DevInfo.getQueue())) { - return Item->Value; - } - } - - return std::string(""); - }; - - switch (PropName) { - case OL_DEVICE_INFO_PLATFORM: - return ReturnValue(Device->Platform); - case OL_DEVICE_INFO_TYPE: - return ReturnValue(OL_DEVICE_TYPE_GPU); - case OL_DEVICE_INFO_NAME: - return ReturnValue(GetInfo({"Device Name"}).c_str()); - case OL_DEVICE_INFO_VENDOR: - return ReturnValue(GetInfo({"Vendor Name"}).c_str()); - case OL_DEVICE_INFO_DRIVER_VERSION: - return ReturnValue( - GetInfo({"CUDA Driver Version", "HSA Runtime Version"}).c_str()); - default: - return OL_ERRC_INVALID_ENUMERATION; - } - - return OL_SUCCESS; -} - -ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t PropSize, void *PropValue) { - return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue, - nullptr); -} - -ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device, - ol_device_info_t PropName, - size_t *PropSizeRet) { - return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet); -} diff --git a/offload/liboffload/src/OffloadLib.cpp b/offload/liboffload/src/OffloadLib.cpp deleted file mode 100644 index 37876713212c9..0000000000000 --- a/offload/liboffload/src/OffloadLib.cpp +++ /dev/null @@ -1,44 +0,0 @@ -//===- offload_lib.cpp - Entry points for the new LLVM/Offload API --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file pulls in the tablegen'd API entry point functions. -// -//===----------------------------------------------------------------------===// - -#include "OffloadImpl.hpp" -#include -#include - -#include - -llvm::StringSet<> &errorStrs() { - static llvm::StringSet<> ErrorStrs; - return ErrorStrs; -} - -ErrSetT &errors() { - static ErrSetT Errors{}; - return Errors; -} - -ol_code_location_t *¤tCodeLocation() { - thread_local ol_code_location_t *CodeLoc = nullptr; - return CodeLoc; -} - -OffloadConfig &offloadConfig() { - static OffloadConfig Config{}; - return Config; -} - -// Pull in the declarations for the implementation funtions. The actual entry -// points in this file wrap these. -#include "OffloadImplFuncDecls.inc" - -// Pull in the tablegen'd entry point definitions. -#include "OffloadEntryPoints.inc" diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 63e2f80302c30..97540d5a3e2b3 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -124,7 +124,6 @@ enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 }; /// we use the level to determine the indentation of the key-value property at /// printing time. See the enum InfoLevelKind for the list of accepted levels. class InfoQueueTy { -public: struct InfoQueueEntryTy { std::string Key; std::string Value; @@ -132,7 +131,6 @@ class InfoQueueTy { uint64_t Level; }; -private: std::deque Queue; public: @@ -155,8 +153,6 @@ class InfoQueueTy { Queue.push_back({Key, Value, Units, L}); } - const std::deque &getQueue() const { return Queue; } - /// Print all info entries added to the queue. void print() const { // We print four spances for each level. diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg index 658ae5f9653ba..2f1ef3e98d817 100644 --- a/offload/test/lit.cfg +++ b/offload/test/lit.cfg @@ -66,7 +66,7 @@ def evaluate_bool_env(env): config.name = 'libomptarget :: ' + config.libomptarget_current_target # suffixes: A list of file extensions to treat as test files. -config.suffixes = ['.c', '.cpp', '.cc', '.f90', '.cu', '.td'] +config.suffixes = ['.c', '.cpp', '.cc', '.f90', '.cu'] # excludes: A list of directories to exclude from the testuites. config.excludes = ['Inputs'] @@ -418,4 +418,3 @@ config.substitutions.append(("%flags", config.test_flags)) config.substitutions.append(("%not", config.libomptarget_not)) config.substitutions.append(("%offload-device-info", config.offload_device_info)) -config.substitutions.append(("%offload-tblgen", config.offload_tblgen)) diff --git a/offload/test/lit.site.cfg.in b/offload/test/lit.site.cfg.in index ce3f6abf50a13..a1cb5acc38a40 100644 --- a/offload/test/lit.site.cfg.in +++ b/offload/test/lit.site.cfg.in @@ -28,6 +28,5 @@ config.libomptarget_debug = @LIBOMPTARGET_DEBUG@ config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@ config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@ config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@ -config.offload_tblgen = "@OFFLOAD_TBLGEN_EXECUTABLE@" # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/offload/test/tools/offload-tblgen/default_returns.td b/offload/test/tools/offload-tblgen/default_returns.td deleted file mode 100644 index 995e24abf707d..0000000000000 --- a/offload/test/tools/offload-tblgen/default_returns.td +++ /dev/null @@ -1,40 +0,0 @@ -// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API -// RUN: %offload-tblgen -gen-entry-points -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-VALIDATION - -// Check implicit returns are included in documentation and the validation -// wrappers where applicable - -include "APIDefs.td" - -def : Handle { - let name = "ol_foo_handle_t"; - let desc = "Example handle type"; -} - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"uint32_t", "ParamValue", "A plain value parameter">, - Param<"ol_foo_handle_t", "ParamHandle", "A handle parameter">, - Param<"uint32_t*", "ParamPointer", "A pointer parameter">, - Param<"uint32_t*", "ParamPointerOpt", "An optional pointer parameter", PARAM_OUT_OPTIONAL> - ]; - let returns = []; -} - -// CHECK-API: /// @returns -// CHECK-API: OL_RESULT_SUCCESS -// CHECK-API: OL_ERRC_INVALID_NULL_HANDLE -// CHECK-API-NEXT: `NULL == ParamHandle` -// CHECK-API: OL_ERRC_INVALID_NULL_POINTER -// CHECK-API-NEXT: `NULL == ParamPointer` -// CHECK-API-NOT: `NULL == ParamPointerOpt` - -// CHECK-VALIDATION: FunctionA_val -// CHECK-VALIDATION: if (NULL == ParamHandle) -// CHECK-VALIDATION-NEXT: return OL_ERRC_INVALID_NULL_HANDLE; -// CHECK-VALIDATION: if (NULL == ParamPointer) -// CHECK-VALIDATION-NEXT: return OL_ERRC_INVALID_NULL_POINTER; -// CHECK-VALIDATION-NOT: if (NULL == ParamPointerOpt) diff --git a/offload/test/tools/offload-tblgen/entry_points.td b/offload/test/tools/offload-tblgen/entry_points.td deleted file mode 100644 index a66ddb9279920..0000000000000 --- a/offload/test/tools/offload-tblgen/entry_points.td +++ /dev/null @@ -1,37 +0,0 @@ -// RUN: %offload-tblgen -gen-entry-points -I %S/../../../liboffload/API %s | %fcheck-generic - -// Check entry point wrapper functions are generated correctly - -include "APIDefs.td" - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"uint32_t", "ParamA", "Parameter A description">, - Param<"uint32_t*", "ParamB", "Parameter B description">, - ]; - let returns = [ - Return<"OL_ERRC_INVALID_VALUE", ["When a value is invalid"]> - ]; -} - - -// The validation function should call the implementation function -// CHECK: FunctionA_val -// CHECK: return FunctionA_impl(ParamA, ParamB); - -// CHECK: ol_result_t{{.*}} FunctionA( - -// The entry point should print tracing output if enabled -// CHECK: if (offloadConfig().TracingEnabled) { -// CHECK-NEXT: "---> FunctionA"; - -// CHECK: Result = FunctionA_val(ParamA, ParamB); - -// Tracing should construct a param struct for printing -// CHECK: if (offloadConfig().TracingEnabled) { -// CHECK: function_a_params_t Params = {&ParamA, &ParamB}; - -// CHECK: return Result; diff --git a/offload/test/tools/offload-tblgen/functions_basic.td b/offload/test/tools/offload-tblgen/functions_basic.td deleted file mode 100644 index dec93577b57e9..0000000000000 --- a/offload/test/tools/offload-tblgen/functions_basic.td +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API -// RUN: %offload-tblgen -gen-exports -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-EXPORTS -// RUN: %offload-tblgen -gen-func-names -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-FUNC-MACRO - -// Check basic support for API functions - -include "APIDefs.td" - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"uint32_t", "ParamA", "Parameter A description">, - Param<"uint32_t*", "ParamB", "Parameter B description">, - ]; - let returns = [ - Return<"OL_ERRC_INVALID_VALUE", ["When a value is invalid"]> - ]; -} - -// CHECK-API: /// @brief Function A description -// CHECK-API: /// @details -// CHECK-API-NEXT: Function A detailed information -// CHECK-API: /// @returns -// CHECK-API: OL_ERRC_INVALID_VALUE -// CHECK-API-NEXT: When a value is invalid - -// CHECK-API: ol_result_t -// CHECK-API-SAME: FunctionA - -// CHECK-API: // Parameter A description -// CHECK-API-NEXT: uint32_t ParamA -// CHECK-API: // Parameter B description -// CHECK-API-NEXT: uint32_t* ParamB - -// CHECK-EXPORTS: FunctionA - -// CHECK-FUNC-MACRO: OFFLOAD_FUNC(FunctionA) diff --git a/offload/test/tools/offload-tblgen/functions_code_loc.td b/offload/test/tools/offload-tblgen/functions_code_loc.td deleted file mode 100644 index aec20129343f5..0000000000000 --- a/offload/test/tools/offload-tblgen/functions_code_loc.td +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API -// RUN: %offload-tblgen -gen-exports -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-EXPORTS -// RUN: %offload-tblgen -gen-func-names -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-FUNC-MACRO - -// Check that the function variant with code location information is generated -// and is otherwise the same as the regular function - -include "APIDefs.td" - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"uint32_t", "ParamA", "Parameter A description">, - Param<"uint32_t*", "ParamB", "Parameter B description">, - ]; - let returns = [ - Return<"OL_ERRC_INVALID_VALUE", ["When a value is invalid"]> - ]; -} - -// CHECK-API-DAG: ol_result_t{{.*}} FunctionA -// CHECK-API-DAG: ol_result_t{{.*}} FunctionAWithCodeLoc -// CHECK-EXPORTS: FunctionAWithCodeLoc -// CHECK-FUNC-MACRO: OFFLOAD_FUNC(FunctionAWithCodeLoc) diff --git a/offload/test/tools/offload-tblgen/functions_ranged_param.td b/offload/test/tools/offload-tblgen/functions_ranged_param.td deleted file mode 100644 index 21a84d8a70334..0000000000000 --- a/offload/test/tools/offload-tblgen/functions_ranged_param.td +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic - -// Check that ranged function parameters are implemented correctly. These -// are pointers to an array of an arbitrary size. Their size is described as a -// range between two values. This is typically between 0 and a parameter such -// as NumItems. The range information helps the printing code print the entire -// range of the output rather than just the pointer or the first element. - -include "APIDefs.td" - -def : Handle { - let name = "some_handle_t"; - let desc = "An example handle type"; -} - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"size_t", "OutCount", "the number of things to write out", PARAM_IN>, - RangedParam<"some_handle_t*", "OutPtr", "pointer to the output things.", PARAM_OUT, - Range<"0", "OutCount">> - ]; - let returns = []; -} - -// CHECK: inline std::ostream &operator<<(std::ostream &os, const struct function_a_params_t *params) { -// CHECK: os << ".OutPtr = "; -// CHECK: for (size_t i = 0; i < *params->pOutCount; i++) { -// CHECK: if (i > 0) { -// CHECK: os << ", "; -// CHECK: } -// CHECK: printPtr(os, (*params->pOutPtr)[i]); -// CHECK: } -// CHECK: os << "}"; diff --git a/offload/test/tools/offload-tblgen/print_enum.td b/offload/test/tools/offload-tblgen/print_enum.td deleted file mode 100644 index 0b5506009bec5..0000000000000 --- a/offload/test/tools/offload-tblgen/print_enum.td +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic - -// Check that print helpers are created for enums - -include "APIDefs.td" - -def : Enum { - let name = "my_enum_t"; - let desc = "An example enum"; - let etors =[ - Etor<"VALUE_ONE", "The first enum value">, - Etor<"VALUE_TWO", "The second enum value">, - Etor<"VALUE_THREE", "The third enum value">, - Etor<"VALUE_FOUR", "The fourth enum value">, - ]; -} - -// CHECK: inline std::ostream &operator<<(std::ostream &os, enum my_enum_t value) -// CHECK: switch (value) { -// CHECK: case MY_ENUM_VALUE_ONE: -// CHECK: os << "MY_ENUM_VALUE_ONE"; -// CHECK: break; -// CHECK: case MY_ENUM_VALUE_TWO: -// CHECK: os << "MY_ENUM_VALUE_TWO"; -// CHECK: break; -// CHECK: case MY_ENUM_VALUE_THREE: -// CHECK: os << "MY_ENUM_VALUE_THREE"; -// CHECK: break; -// CHECK: case MY_ENUM_VALUE_FOUR: -// CHECK: os << "MY_ENUM_VALUE_FOUR"; -// CHECK: break; -// CHECK: default: -// CHECK: os << "unknown enumerator"; -// CHECK: break; diff --git a/offload/test/tools/offload-tblgen/print_function.td b/offload/test/tools/offload-tblgen/print_function.td deleted file mode 100644 index 3f4944df65941..0000000000000 --- a/offload/test/tools/offload-tblgen/print_function.td +++ /dev/null @@ -1,38 +0,0 @@ -// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-PRINT -// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API - -// Check that print helpers are created for functions - -include "APIDefs.td" - -def : Handle { - let name = "ol_foo_handle_t"; - let desc = "Example handle type"; -} - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"uint32_t", "ParamValue", "A plain value parameter">, - Param<"ol_foo_handle_t", "ParamHandle", "A handle parameter">, - Param<"uint32_t*", "ParamPointer", "A pointer parameter">, - ]; - let returns = []; -} - -// CHECK-API: typedef struct function_a_params_t { -// CHECK-API-NEXT: uint32_t* pParamValue; -// CHECK-API-NEXT: ol_foo_handle_t* pParamHandle; -// CHECK-API-NEXT: uint32_t** pParamPointer; - -// CHECK-PRINT: inline std::ostream &operator<<(std::ostream &os, const struct function_a_params_t *params) -// CHECK-PRINT: os << ".ParamValue = "; -// CHECK-PRINT: os << *params->pParamValue; -// CHECK-PRINT: os << ", "; -// CHECK-PRINT: os << ".ParamHandle = "; -// CHECK-PRINT: printPtr(os, *params->pParamHandle); -// CHECK-PRINT: os << ", "; -// CHECK-PRINT: os << ".ParamPointer = "; -// CHECK-PRINT: printPtr(os, *params->pParamPointer); diff --git a/offload/test/tools/offload-tblgen/type_tagged_enum.td b/offload/test/tools/offload-tblgen/type_tagged_enum.td deleted file mode 100644 index 49e91e43bb6ef..0000000000000 --- a/offload/test/tools/offload-tblgen/type_tagged_enum.td +++ /dev/null @@ -1,76 +0,0 @@ -// RUN: %offload-tblgen -gen-api -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-API -// RUN: %offload-tblgen -gen-print-header -I %S/../../../liboffload/API %s | %fcheck-generic --check-prefix=CHECK-PRINT - -// Check that type-tagged enumerators are implemented correctly. They enable -// functions to return data of an arbitrary type and size via a void*, using -// the value of an enum parameter to indicate which type is being returned. -// This allows, for example, for a single olGetDeviceInfo function, rather -// than requiring a separate entry point for every possible query. - -include "APIDefs.td" - -def : Handle { - let name = "some_handle_t"; - let desc = "An example handle type"; -} - -def : Enum { - let name = "my_type_tagged_enum_t"; - let desc = "Example type tagged enum"; - let is_typed = 1; - let etors = [ - TaggedEtor<"VALUE_ONE", "uint32_t", "Value one.">, - TaggedEtor<"VALUE_TWO", "char[]", "Value two.">, - TaggedEtor<"VALUE_THREE", "some_handle_t", "Value three."> - ]; -} - -// Check the tagged types appear in the comments -// CHECK-API: typedef enum my_type_tagged_enum_t { -// CHECK-API-NEXT: [uint32_t] Value one. -// CHECK-API-NEXT: MY_TYPE_TAGGED_ENUM_VALUE_ONE = 0, -// CHECK-API-NEXT: [char[]] Value two. -// CHECK-API-NEXT: MY_TYPE_TAGGED_ENUM_VALUE_TWO = 1, -// CHECK-API-NEXT: [some_handle_t] Value three. -// CHECK-API-NEXT: MY_TYPE_TAGGED_ENUM_VALUE_THREE = 2, - -def : Function { - let name = "FunctionA"; - let desc = "Function A description"; - let details = [ "Function A detailed information" ]; - let params = [ - Param<"my_type_tagged_enum_t", "PropName", "type of the info to retrieve", PARAM_IN>, - Param<"size_t", "PropSize", "the number of bytes pointed to by PropValue.", PARAM_IN>, - TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. " - "If PropSize is not equal to or greater to the real number of bytes needed to return the info " - "then the OL_ERRC_INVALID_SIZE error is returned and PropValue is not used.", PARAM_OUT, - TypeInfo<"PropName" , "PropSize">> - ]; - let returns = []; -} - -// Check that a tagged enum print function definition is generated -// CHECK-PRINT: void printTagged(std::ostream &os, const void *ptr, my_type_tagged_enum_t value, size_t size) { -// CHECK-PRINT: case MY_TYPE_TAGGED_ENUM_VALUE_ONE: { -// CHECK-PRINT: const uint32_t * const tptr = (const uint32_t * const)ptr; -// CHECK-PRINT: os << (const void *)tptr << " ("; -// CHECK-PRINT: os << *tptr; -// CHECK-PRINT: os << ")"; -// CHECK-PRINT: break; -// CHECK-PRINT: } -// CHECK-PRINT: case MY_TYPE_TAGGED_ENUM_VALUE_TWO: { -// CHECK-PRINT: printPtr(os, (const char*) ptr); -// CHECK-PRINT: break; -// CHECK-PRINT: } -// CHECK-PRINT: case MY_TYPE_TAGGED_ENUM_VALUE_THREE: { -// CHECK-PRINT: const some_handle_t * const tptr = (const some_handle_t * const)ptr; -// CHECK-PRINT: os << (const void *)tptr << " ("; -// CHECK-PRINT: os << *tptr; -// CHECK-PRINT: os << ")"; -// CHECK-PRINT: break; -// CHECK-PRINT: } - -// Check that the tagged type information is used when printing function parameters -// CHECK-PRINT: std::ostream &operator<<(std::ostream &os, const struct function_a_params_t *params) { -// CHECK-PRINT: os << ".PropValue = " -// CHECK-PRINT-NEXT: printTagged(os, *params->pPropValue, *params->pPropName, *params->pPropSize); diff --git a/offload/tools/offload-tblgen/APIGen.cpp b/offload/tools/offload-tblgen/APIGen.cpp deleted file mode 100644 index 97a2464f7a75c..0000000000000 --- a/offload/tools/offload-tblgen/APIGen.cpp +++ /dev/null @@ -1,229 +0,0 @@ -//===- offload-tblgen/APIGen.cpp - Tablegen backend for Offload header ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a Tablegen backend that produces the contents of the Offload API -// header. The generated comments are Doxygen compatible. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" - -#include "GenCommon.hpp" -#include "RecordTypes.hpp" - -using namespace llvm; -using namespace offload::tblgen; - -// Produce a possibly multi-line comment from the input string -static std::string MakeComment(StringRef in) { - std::string out = ""; - size_t LineStart = 0; - size_t LineBreak = 0; - while (LineBreak < in.size()) { - LineBreak = in.find_first_of("\n", LineStart); - if (LineBreak - LineStart <= 1) { - break; - } - out += std::string("/// ") + - in.substr(LineStart, LineBreak - LineStart).str() + "\n"; - LineStart = LineBreak + 1; - } - - return out; -} - -static void ProcessHandle(const HandleRec &H, raw_ostream &OS) { - OS << CommentsHeader; - OS << formatv("/// @brief {0}\n", H.getDesc()); - OS << formatv("typedef struct {0}_ *{0};\n", H.getName()); -} - -static void ProcessTypedef(const TypedefRec &T, raw_ostream &OS) { - OS << CommentsHeader; - OS << formatv("/// @brief {0}\n", T.getDesc()); - OS << formatv("typedef {0} {1};\n", T.getValue(), T.getName()); -} - -static void ProcessMacro(const MacroRec &M, raw_ostream &OS) { - OS << CommentsHeader; - OS << formatv("#ifndef {0}\n", M.getName()); - if (auto Condition = M.getCondition()) { - OS << formatv("#if {0}\n", *Condition); - } - OS << "/// @brief " << M.getDesc() << "\n"; - OS << formatv("#define {0} {1}\n", M.getNameWithArgs(), M.getValue()); - if (auto AltValue = M.getAltValue()) { - OS << "#else\n"; - OS << formatv("#define {0} {1}\n", M.getNameWithArgs(), *AltValue); - } - if (auto Condition = M.getCondition()) { - OS << formatv("#endif // {0}\n", *Condition); - } - OS << formatv("#endif // {0}\n", M.getName()); -} - -static void ProcessFunction(const FunctionRec &F, raw_ostream &OS) { - OS << CommentsHeader; - OS << formatv("/// @brief {0}\n", F.getDesc()); - OS << CommentsBreak; - - OS << "/// @details\n"; - for (auto &Detail : F.getDetails()) { - OS << formatv("/// - {0}\n", Detail); - } - OS << CommentsBreak; - - // Emit analogue remarks - auto Analogues = F.getAnalogues(); - if (!Analogues.empty()) { - OS << "/// @remarks\n/// _Analogues_\n"; - for (auto &Analogue : Analogues) { - OS << formatv("/// - **{0}**\n", Analogue); - } - OS << CommentsBreak; - } - - OS << "/// @returns\n"; - auto Returns = F.getReturns(); - for (auto &Ret : Returns) { - OS << formatv("/// - ::{0}\n", Ret.getValue()); - auto RetConditions = Ret.getConditions(); - for (auto &RetCondition : RetConditions) { - OS << formatv("/// + {0}\n", RetCondition); - } - } - - OS << formatv("{0}_APIEXPORT {1}_result_t {0}_APICALL ", PrefixUpper, - PrefixLower); - OS << F.getName(); - OS << "(\n"; - auto Params = F.getParams(); - for (auto &Param : Params) { - OS << MakeParamComment(Param) << "\n"; - OS << " " << Param.getType() << " " << Param.getName(); - if (Param != Params.back()) { - OS << ",\n"; - } else { - OS << "\n"; - } - } - OS << ");\n\n"; -} - -static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) { - OS << CommentsHeader; - OS << formatv("/// @brief {0}\n", Enum.getDesc()); - OS << formatv("typedef enum {0} {{\n", Enum.getName()); - - uint32_t EtorVal = 0; - for (const auto &EnumVal : Enum.getValues()) { - if (Enum.isTyped()) { - OS << MakeComment( - formatv("[{0}] {1}", EnumVal.getTaggedType(), EnumVal.getDesc()) - .str()); - } else { - OS << MakeComment(EnumVal.getDesc()); - } - OS << formatv(TAB_1 "{0}_{1} = {2},\n", Enum.getEnumValNamePrefix(), - EnumVal.getName(), EtorVal++); - } - - // Add force uint32 val - OS << formatv(TAB_1 "/// @cond\n" TAB_1 - "{0}_FORCE_UINT32 = 0x7fffffff\n" TAB_1 - "/// @endcond\n\n", - Enum.getEnumValNamePrefix()); - - OS << formatv("} {0};\n", Enum.getName()); -} - -static void ProcessStruct(const StructRec &Struct, raw_ostream &OS) { - OS << CommentsHeader; - OS << formatv("/// @brief {0}\n", Struct.getDesc()); - OS << formatv("typedef struct {0} {{\n", Struct.getName()); - - for (const auto &Member : Struct.getMembers()) { - OS << formatv(TAB_1 "{0} {1}; {2}", Member.getType(), Member.getName(), - MakeComment(Member.getDesc())); - } - - OS << formatv("} {0};\n\n", Struct.getName()); -} - -static void ProcessFuncParamStruct(const FunctionRec &Func, raw_ostream &OS) { - if (Func.getParams().size() == 0) { - return; - } - - auto FuncParamStructBegin = R"( -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for {0} -/// @details Each entry is a pointer to the parameter passed to the function; -typedef struct {1} {{ -)"; - - OS << formatv(FuncParamStructBegin, Func.getName(), - Func.getParamStructName()); - for (const auto &Param : Func.getParams()) { - OS << TAB_1 << Param.getType() << "* p" << Param.getName() << ";\n"; - } - OS << formatv("} {0};\n", Func.getParamStructName()); -} - -static void ProcessFuncWithCodeLocVariant(const FunctionRec &Func, - raw_ostream &OS) { - - auto FuncWithCodeLocBegin = R"( -/////////////////////////////////////////////////////////////////////////////// -/// @brief Variant of {0} that also sets source code location information -/// @details See also ::{0} -OL_APIEXPORT ol_result_t OL_APICALL {0}WithCodeLoc( -)"; - OS << formatv(FuncWithCodeLocBegin, Func.getName()); - auto Params = Func.getParams(); - for (auto &Param : Params) { - OS << " " << Param.getType() << " " << Param.getName(); - OS << ",\n"; - } - OS << "ol_code_location_t *CodeLocation);\n\n"; -} - -void EmitOffloadAPI(const RecordKeeper &Records, raw_ostream &OS) { - OS << GenericHeader; - OS << FileHeader; - // Generate main API definitions - for (auto *R : Records.getAllDerivedDefinitions("APIObject")) { - if (R->isSubClassOf("Macro")) { - ProcessMacro(MacroRec{R}, OS); - } else if (R->isSubClassOf("Typedef")) { - ProcessTypedef(TypedefRec{R}, OS); - } else if (R->isSubClassOf("Handle")) { - ProcessHandle(HandleRec{R}, OS); - } else if (R->isSubClassOf("Function")) { - ProcessFunction(FunctionRec{R}, OS); - } else if (R->isSubClassOf("Enum")) { - ProcessEnum(EnumRec{R}, OS); - } else if (R->isSubClassOf("Struct")) { - ProcessStruct(StructRec{R}, OS); - } - } - - // Generate auxiliary definitions (func param structs etc) - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - ProcessFuncParamStruct(FunctionRec{R}, OS); - } - - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - ProcessFuncWithCodeLocVariant(FunctionRec{R}, OS); - } - - OS << FileFooter; -} diff --git a/offload/tools/offload-tblgen/CMakeLists.txt b/offload/tools/offload-tblgen/CMakeLists.txt deleted file mode 100644 index 52986cbbaa918..0000000000000 --- a/offload/tools/offload-tblgen/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -##===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -##===----------------------------------------------------------------------===## -include(TableGen) - -add_tablegen(offload-tblgen OFFLOAD - EXPORT OFFLOAD - APIGen.cpp - EntryPointGen.cpp - FuncsGen.cpp - GenCommon.hpp - Generators.hpp - offload-tblgen.cpp - PrintGen.cpp - RecordTypes.hpp - ) - -set(OFFLOAD_TABLEGEN_EXE "${OFFLOAD_TABLEGEN_EXE}" CACHE INTERNAL "") -set(OFFLOAD_TABLEGEN_TARGET "${OFFLOAD_TABLEGEN_TARGET}" CACHE INTERNAL "") - diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp deleted file mode 100644 index 990ff96a3121d..0000000000000 --- a/offload/tools/offload-tblgen/EntryPointGen.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===- offload-tblgen/EntryPointGen.cpp - Tablegen backend for Offload ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a Tablegen backend that produces the actual entry points for the -// Offload API. It serves as a place to integrate functionality like tracing -// and validation before dispatching to the actual implementations. -//===----------------------------------------------------------------------===// - -#include "llvm/Support/FormatVariadic.h" -#include "llvm/TableGen/Record.h" - -#include "GenCommon.hpp" -#include "RecordTypes.hpp" - -using namespace llvm; -using namespace offload::tblgen; - -static void EmitValidationFunc(const FunctionRec &F, raw_ostream &OS) { - OS << CommentsHeader; - // Emit preamble - OS << formatv("{0}_impl_result_t {1}_val(\n ", PrefixLower, F.getName()); - // Emit arguments - std::string ParamNameList = ""; - for (auto &Param : F.getParams()) { - OS << Param.getType() << " " << Param.getName(); - if (Param != F.getParams().back()) { - OS << ", "; - } - ParamNameList += Param.getName().str() + ", "; - } - OS << ") {\n"; - - OS << TAB_1 "if (true /*enableParameterValidation*/) {\n"; - // Emit validation checks - for (const auto &Return : F.getReturns()) { - for (auto &Condition : Return.getConditions()) { - if (Condition.starts_with("`") && Condition.ends_with("`")) { - auto ConditionString = Condition.substr(1, Condition.size() - 2); - OS << formatv(TAB_2 "if ({0}) {{\n", ConditionString); - OS << formatv(TAB_3 "return {0};\n", Return.getValue()); - OS << TAB_2 "}\n\n"; - } - } - } - OS << TAB_1 "}\n\n"; - - // Perform actual function call to the implementation - ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2); - OS << formatv(TAB_1 "return {0}_impl({1});\n\n", F.getName(), ParamNameList); - OS << "}\n"; -} - -static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) { - // Emit preamble - OS << formatv("{1}_APIEXPORT {0}_result_t {1}_APICALL {2}(\n ", PrefixLower, - PrefixUpper, F.getName()); - // Emit arguments - std::string ParamNameList = ""; - for (auto &Param : F.getParams()) { - OS << Param.getType() << " " << Param.getName(); - if (Param != F.getParams().back()) { - OS << ", "; - } - ParamNameList += Param.getName().str() + ", "; - } - OS << ") {\n"; - - // Emit pre-call prints - OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n"; - OS << formatv(TAB_2 "std::cout << \"---> {0}\";\n", F.getName()); - OS << TAB_1 "}\n\n"; - - // Perform actual function call to the validation wrapper - ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2); - OS << formatv(TAB_1 "{0}_result_t Result = {1}_val({2});\n\n", PrefixLower, - F.getName(), ParamNameList); - - // Emit post-call prints - OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n"; - if (F.getParams().size() > 0) { - OS << formatv(TAB_2 "{0} Params = {{", F.getParamStructName()); - for (const auto &Param : F.getParams()) { - OS << "&" << Param.getName(); - if (Param != F.getParams().back()) { - OS << ", "; - } - } - OS << formatv("};\n"); - OS << TAB_2 "std::cout << \"(\" << &Params << \")\";\n"; - } else { - OS << TAB_2 "std::cout << \"()\";\n"; - } - OS << TAB_2 "std::cout << \"-> \" << Result << \"\\n\";\n"; - OS << TAB_2 "if (Result && Result->Details) {\n"; - OS << TAB_3 "std::cout << \" *Error Details* \" << Result->Details " - "<< \" \\n\";\n"; - OS << TAB_2 "}\n"; - OS << TAB_1 "}\n"; - - OS << TAB_1 "return Result;\n"; - OS << "}\n"; -} - -static void EmitCodeLocWrapper(const FunctionRec &F, raw_ostream &OS) { - // Emit preamble - OS << formatv("{0}_result_t {1}WithCodeLoc(\n ", PrefixLower, F.getName()); - // Emit arguments - std::string ParamNameList = ""; - for (auto &Param : F.getParams()) { - OS << Param.getType() << " " << Param.getName() << ", "; - ParamNameList += Param.getName().str(); - if (Param != F.getParams().back()) { - ParamNameList += ", "; - } - } - OS << "ol_code_location_t *CodeLocation"; - OS << ") {\n"; - OS << TAB_1 "currentCodeLocation() = CodeLocation;\n"; - OS << formatv(TAB_1 "{0}_result_t Result = {1}({2});\n\n", PrefixLower, - F.getName(), ParamNameList); - OS << TAB_1 "currentCodeLocation() = nullptr;\n"; - OS << TAB_1 "return Result;\n"; - OS << "}\n"; -} - -void EmitOffloadEntryPoints(const RecordKeeper &Records, raw_ostream &OS) { - OS << GenericHeader; - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - EmitValidationFunc(FunctionRec{R}, OS); - EmitEntryPointFunc(FunctionRec{R}, OS); - EmitCodeLocWrapper(FunctionRec{R}, OS); - } -} diff --git a/offload/tools/offload-tblgen/FuncsGen.cpp b/offload/tools/offload-tblgen/FuncsGen.cpp deleted file mode 100644 index 3238652176198..0000000000000 --- a/offload/tools/offload-tblgen/FuncsGen.cpp +++ /dev/null @@ -1,74 +0,0 @@ -//===- offload-tblgen/APIGen.cpp - Tablegen backend for Offload functions -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a Tablegen backend that handles generation of various small files -// pertaining to the API functions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/FormatVariadic.h" -#include "llvm/TableGen/Record.h" - -#include "GenCommon.hpp" -#include "RecordTypes.hpp" - -using namespace llvm; -using namespace offload::tblgen; - -// Emit a list of just the API function names -void EmitOffloadFuncNames(const RecordKeeper &Records, raw_ostream &OS) { - OS << GenericHeader; - OS << R"( -#ifndef OFFLOAD_FUNC -#error Please define the macro OFFLOAD_FUNC(Function) -#endif - -)"; - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - FunctionRec FR{R}; - OS << formatv("OFFLOAD_FUNC({0})", FR.getName()) << "\n"; - } - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - FunctionRec FR{R}; - OS << formatv("OFFLOAD_FUNC({0}WithCodeLoc)", FR.getName()) << "\n"; - } - - OS << "\n#undef OFFLOAD_FUNC\n"; -} - -void EmitOffloadExports(const RecordKeeper &Records, raw_ostream &OS) { - OS << "VERS1.0 {\n"; - OS << TAB_1 "global:\n"; - - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - OS << formatv(TAB_2 "{0};\n", FunctionRec(R).getName()); - } - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - OS << formatv(TAB_2 "{0}WithCodeLoc;\n", FunctionRec(R).getName()); - } - OS << TAB_1 "local:\n"; - OS << TAB_2 "*;\n"; - OS << "};\n"; -} - -// Emit declarations for every implementation function -void EmitOffloadImplFuncDecls(const RecordKeeper &Records, raw_ostream &OS) { - OS << GenericHeader; - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - FunctionRec F{R}; - OS << formatv("{0}_impl_result_t {1}_impl(", PrefixLower, F.getName()); - auto Params = F.getParams(); - for (auto &Param : Params) { - OS << Param.getType() << " " << Param.getName(); - if (Param != Params.back()) { - OS << ", "; - } - } - OS << ");\n\n"; - } -} diff --git a/offload/tools/offload-tblgen/GenCommon.hpp b/offload/tools/offload-tblgen/GenCommon.hpp deleted file mode 100644 index db432e9958b5d..0000000000000 --- a/offload/tools/offload-tblgen/GenCommon.hpp +++ /dev/null @@ -1,67 +0,0 @@ -//===- offload-tblgen/GenCommon.cpp - Common defs for Offload generators --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "RecordTypes.hpp" -#include "llvm/Support/FormatVariadic.h" - -// Having inline bits of tabbed code is hard to read, provide some definitions -// so we can keep things tidier -#define TAB_1 " " -#define TAB_2 " " -#define TAB_3 " " -#define TAB_4 " " -#define TAB_5 " " - -constexpr auto GenericHeader = - R"(//===- Auto-generated file, part of the LLVM/Offload project --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -)"; - -constexpr auto FileHeader = R"( -// Auto-generated file, do not manually edit. - -#pragma once - -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -)"; - -constexpr auto FileFooter = R"( -#if defined(__cplusplus) -} // extern "C" -#endif - -)"; - -constexpr auto CommentsHeader = R"( -/////////////////////////////////////////////////////////////////////////////// -)"; - -constexpr auto CommentsBreak = "///\n"; - -constexpr auto PrefixLower = "ol"; -constexpr auto PrefixUpper = "OL"; - -inline std::string -MakeParamComment(const llvm::offload::tblgen::ParamRec &Param) { - return llvm::formatv("// {0}{1}{2} {3}", (Param.isIn() ? "[in]" : ""), - (Param.isOut() ? "[out]" : ""), - (Param.isOpt() ? "[optional]" : ""), Param.getDesc()); -} diff --git a/offload/tools/offload-tblgen/Generators.hpp b/offload/tools/offload-tblgen/Generators.hpp deleted file mode 100644 index 8b6104c5cd9c6..0000000000000 --- a/offload/tools/offload-tblgen/Generators.hpp +++ /dev/null @@ -1,23 +0,0 @@ -//===- offload-tblgen/Generators.hpp - Offload generator declarations -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "llvm/TableGen/Record.h" - -void EmitOffloadAPI(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitOffloadFuncNames(const llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); -void EmitOffloadImplFuncDecls(const llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); -void EmitOffloadEntryPoints(const llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); -void EmitOffloadPrintHeader(const llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); -void EmitOffloadExports(const llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp deleted file mode 100644 index 2a7c63c3dfd1f..0000000000000 --- a/offload/tools/offload-tblgen/PrintGen.cpp +++ /dev/null @@ -1,226 +0,0 @@ -//===- offload-tblgen/APIGen.cpp - Tablegen backend for Offload printing --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a Tablegen backend that produces print functions for the Offload API -// entry point functions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/FormatVariadic.h" -#include "llvm/TableGen/Record.h" - -#include "GenCommon.hpp" -#include "RecordTypes.hpp" - -using namespace llvm; -using namespace offload::tblgen; - -constexpr auto PrintEnumHeader = - R"(/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the {0} type -/// @returns std::ostream & -)"; - -constexpr auto PrintTaggedEnumHeader = - R"(/////////////////////////////////////////////////////////////////////////////// -/// @brief Print type-tagged {0} enum value -/// @returns std::ostream & -)"; - -static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) { - OS << formatv(PrintEnumHeader, Enum.getName()); - OS << formatv( - "inline std::ostream &operator<<(std::ostream &os, enum {0} value) " - "{{\n" TAB_1 "switch (value) {{\n", - Enum.getName()); - - for (const auto &Val : Enum.getValues()) { - auto Name = Enum.getEnumValNamePrefix() + "_" + Val.getName(); - OS << formatv(TAB_1 "case {0}:\n", Name); - OS << formatv(TAB_2 "os << \"{0}\";\n", Name); - OS << formatv(TAB_2 "break;\n"); - } - - OS << TAB_1 "default:\n" TAB_2 "os << \"unknown enumerator\";\n" TAB_2 - "break;\n" TAB_1 "}\n" TAB_1 "return os;\n}\n\n"; - - if (!Enum.isTyped()) { - return; - } - - OS << formatv(PrintTaggedEnumHeader, Enum.getName()); - - OS << formatv(R"""(template <> -inline void printTagged(std::ostream &os, const void *ptr, {0} value, size_t size) {{ - if (ptr == NULL) {{ - printPtr(os, ptr); - return; - } - - switch (value) {{ -)""", - Enum.getName()); - - for (const auto &Val : Enum.getValues()) { - auto Name = Enum.getEnumValNamePrefix() + "_" + Val.getName(); - auto Type = Val.getTaggedType(); - OS << formatv(TAB_1 "case {0}: {{\n", Name); - // Special case for strings - if (Type == "char[]") { - OS << formatv(TAB_2 "printPtr(os, (const char*) ptr);\n"); - } else { - OS << formatv(TAB_2 "const {0} * const tptr = (const {0} * const)ptr;\n", - Type); - // TODO: Handle other cases here - OS << TAB_2 "os << (const void *)tptr << \" (\";\n"; - if (Type.ends_with("*")) { - OS << TAB_2 "os << printPtr(os, tptr);\n"; - } else { - OS << TAB_2 "os << *tptr;\n"; - } - OS << TAB_2 "os << \")\";\n"; - } - OS << formatv(TAB_2 "break;\n" TAB_1 "}\n"); - } - - OS << TAB_1 "default:\n" TAB_2 "os << \"unknown enumerator\";\n" TAB_2 - "break;\n" TAB_1 "}\n"; - - OS << "}\n"; -} - -static void EmitResultPrint(raw_ostream &OS) { - OS << R""( -inline std::ostream &operator<<(std::ostream &os, - const ol_error_struct_t *Err) { - if (Err == nullptr) { - os << "OL_SUCCESS"; - } else { - os << Err->Code; - } - return os; -} -)""; -} - -static void EmitFunctionParamStructPrint(const FunctionRec &Func, - raw_ostream &OS) { - if (Func.getParams().size() == 0) { - return; - } - - OS << formatv(R"( -inline std::ostream &operator<<(std::ostream &os, const struct {0} *params) {{ -)", - Func.getParamStructName()); - - for (const auto &Param : Func.getParams()) { - OS << formatv(TAB_1 "os << \".{0} = \";\n", Param.getName()); - if (auto Range = Param.getRange()) { - OS << formatv(TAB_1 "os << \"{{\";\n"); - OS << formatv(TAB_1 "for (size_t i = {0}; i < *params->p{1}; i++) {{\n", - Range->first, Range->second); - OS << TAB_2 "if (i > 0) {\n"; - OS << TAB_3 " os << \", \";\n"; - OS << TAB_2 "}\n"; - OS << formatv(TAB_2 "printPtr(os, (*params->p{0})[i]);\n", - Param.getName()); - OS << formatv(TAB_1 "}\n"); - OS << formatv(TAB_1 "os << \"}\";\n"); - } else if (auto TypeInfo = Param.getTypeInfo()) { - OS << formatv( - TAB_1 - "printTagged(os, *params->p{0}, *params->p{1}, *params->p{2});\n", - Param.getName(), TypeInfo->first, TypeInfo->second); - } else if (Param.isPointerType() || Param.isHandleType()) { - OS << formatv(TAB_1 "printPtr(os, *params->p{0});\n", Param.getName()); - } else { - OS << formatv(TAB_1 "os << *params->p{0};\n", Param.getName()); - } - if (Param != Func.getParams().back()) { - OS << TAB_1 "os << \", \";\n"; - } - } - - OS << TAB_1 "return os;\n}\n"; -} - -void EmitOffloadPrintHeader(const RecordKeeper &Records, raw_ostream &OS) { - OS << GenericHeader; - OS << R"""( -// Auto-generated file, do not manually edit. - -#pragma once - -#include -#include - - -template inline ol_result_t printPtr(std::ostream &os, const T *ptr); -template inline void printTagged(std::ostream &os, const void *ptr, T value, size_t size); -)"""; - - // ========== - OS << "template struct is_handle : std::false_type {};\n"; - for (auto *R : Records.getAllDerivedDefinitions("Handle")) { - HandleRec H{R}; - OS << formatv("template <> struct is_handle<{0}> : std::true_type {{};\n", - H.getName()); - } - OS << "template inline constexpr bool is_handle_v = " - "is_handle::value;\n"; - // ========= - - // Forward declare the operator<< overloads so their implementations can - // use each other. - OS << "\n"; - for (auto *R : Records.getAllDerivedDefinitions("Enum")) { - OS << formatv( - "inline std::ostream &operator<<(std::ostream &os, enum {0} value);\n", - EnumRec{R}.getName()); - } - OS << "\n"; - - // Create definitions - for (auto *R : Records.getAllDerivedDefinitions("Enum")) { - EnumRec E{R}; - ProcessEnum(E, OS); - } - EmitResultPrint(OS); - - // Emit print functions for the function param structs - for (auto *R : Records.getAllDerivedDefinitions("Function")) { - EmitFunctionParamStructPrint(FunctionRec{R}, OS); - } - - OS << R"""( -/////////////////////////////////////////////////////////////////////////////// -// @brief Print pointer value -template inline ol_result_t printPtr(std::ostream &os, const T *ptr) { - if (ptr == nullptr) { - os << "nullptr"; - } else if constexpr (std::is_pointer_v) { - os << (const void *)(ptr) << " ("; - printPtr(os, *ptr); - os << ")"; - } else if constexpr (std::is_void_v || is_handle_v) { - os << (const void *)ptr; - } else if constexpr (std::is_same_v, char>) { - os << (const void *)(ptr) << " ("; - os << ptr; - os << ")"; - } else { - os << (const void *)(ptr) << " ("; - os << *ptr; - os << ")"; - } - - return OL_SUCCESS; -} - )"""; -} diff --git a/offload/tools/offload-tblgen/RecordTypes.hpp b/offload/tools/offload-tblgen/RecordTypes.hpp deleted file mode 100644 index 0bf3256c525d9..0000000000000 --- a/offload/tools/offload-tblgen/RecordTypes.hpp +++ /dev/null @@ -1,227 +0,0 @@ -//===- offload-tblgen/RecordTypes.cpp - Offload record type wrappers -----===-// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -#include "llvm/TableGen/Record.h" - -namespace llvm { -namespace offload { -namespace tblgen { - -class HandleRec { -public: - explicit HandleRec(const Record *rec) : rec(rec) {} - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - -private: - const Record *rec; -}; - -class MacroRec { -public: - explicit MacroRec(const Record *rec) : rec(rec) { - auto Name = rec->getValueAsString("name"); - auto OpenBrace = Name.find_first_of("("); - nameWithoutArgs = Name.substr(0, OpenBrace); - } - StringRef getName() const { return nameWithoutArgs; } - StringRef getNameWithArgs() const { return rec->getValueAsString("name"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - - std::optional getCondition() const { - return rec->getValueAsOptionalString("condition"); - } - StringRef getValue() const { return rec->getValueAsString("value"); } - std::optional getAltValue() const { - return rec->getValueAsOptionalString("alt_value"); - } - -private: - const Record *rec; - std::string nameWithoutArgs; -}; - -class TypedefRec { -public: - explicit TypedefRec(const Record *rec) : rec(rec) {} - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - StringRef getValue() const { return rec->getValueAsString("value"); } - -private: - const Record *rec; -}; - -class EnumValueRec { -public: - explicit EnumValueRec(const Record *rec) : rec(rec) {} - std::string getName() const { return rec->getValueAsString("name").upper(); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - StringRef getTaggedType() const { - return rec->getValueAsString("tagged_type"); - } - -private: - const Record *rec; -}; - -class EnumRec { -public: - explicit EnumRec(const Record *rec) : rec(rec) { - for (const auto *Val : rec->getValueAsListOfDefs("etors")) { - vals.emplace_back(EnumValueRec{Val}); - } - } - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - const std::vector &getValues() const { return vals; } - - std::string getEnumValNamePrefix() const { - return StringRef(getName().str().substr(0, getName().str().length() - 2)) - .upper(); - } - - bool isTyped() const { return rec->getValueAsBit("is_typed"); } - -private: - const Record *rec; - std::vector vals; -}; - -class StructMemberRec { -public: - explicit StructMemberRec(const Record *rec) : rec(rec) {} - StringRef getType() const { return rec->getValueAsString("type"); } - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - -private: - const Record *rec; -}; - -class StructRec { -public: - explicit StructRec(const Record *rec) : rec(rec) { - for (auto *Member : rec->getValueAsListOfDefs("all_members")) { - members.emplace_back(StructMemberRec(Member)); - } - } - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - std::optional getBaseClass() const { - return rec->getValueAsOptionalString("base_class"); - } - const std::vector &getMembers() const { return members; } - -private: - const Record *rec; - std::vector members; -}; - -class ParamRec { -public: - explicit ParamRec(const Record *rec) : rec(rec) { - flags = rec->getValueAsBitsInit("flags"); - auto *Range = rec->getValueAsDef("range"); - auto RangeBegin = Range->getValueAsString("begin"); - auto RangeEnd = Range->getValueAsString("end"); - if (RangeBegin != "" && RangeEnd != "") { - range = {RangeBegin, RangeEnd}; - } else { - range = std::nullopt; - } - - auto *TypeInfo = rec->getValueAsDef("type_info"); - auto TypeInfoEnum = TypeInfo->getValueAsString("enum"); - auto TypeInfoSize = TypeInfo->getValueAsString("size"); - if (TypeInfoEnum != "" && TypeInfoSize != "") { - typeinfo = {TypeInfoEnum, TypeInfoSize}; - } else { - typeinfo = std::nullopt; - } - } - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getType() const { return rec->getValueAsString("type"); } - bool isPointerType() const { return getType().ends_with('*'); } - bool isHandleType() const { return getType().ends_with("_handle_t"); } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - bool isIn() const { return dyn_cast(flags->getBit(0))->getValue(); } - bool isOut() const { return dyn_cast(flags->getBit(1))->getValue(); } - bool isOpt() const { return dyn_cast(flags->getBit(2))->getValue(); } - - const Record *getRec() const { return rec; } - std::optional> getRange() const { - return range; - } - - std::optional> getTypeInfo() const { - return typeinfo; - } - - // Needed to check whether we're at the back of a vector of params - bool operator!=(const ParamRec &p) const { return rec != p.getRec(); } - -private: - const Record *rec; - const BitsInit *flags; - std::optional> range; - std::optional> typeinfo; -}; - -class ReturnRec { -public: - ReturnRec(const Record *rec) : rec(rec) {} - StringRef getValue() const { return rec->getValueAsString("value"); } - std::vector getConditions() const { - return rec->getValueAsListOfStrings("conditions"); - } - -private: - const Record *rec; -}; - -class FunctionRec { -public: - FunctionRec(const Record *rec) : rec(rec) { - for (auto &Ret : rec->getValueAsListOfDefs("all_returns")) - rets.emplace_back(Ret); - for (auto &Param : rec->getValueAsListOfDefs("params")) - params.emplace_back(Param); - } - - std::string getParamStructName() const { - return llvm::formatv("{0}_params_t", - llvm::convertToSnakeFromCamelCase(getName())); - } - - StringRef getName() const { return rec->getValueAsString("name"); } - StringRef getClass() const { return rec->getValueAsString("api_class"); } - const std::vector &getReturns() const { return rets; } - const std::vector &getParams() const { return params; } - StringRef getDesc() const { return rec->getValueAsString("desc"); } - std::vector getDetails() const { - return rec->getValueAsListOfStrings("details"); - } - std::vector getAnalogues() const { - return rec->getValueAsListOfStrings("analogues"); - } - -private: - std::vector rets; - std::vector params; - - const Record *rec; -}; - -} // namespace tblgen -} // namespace offload -} // namespace llvm diff --git a/offload/tools/offload-tblgen/offload-tblgen.cpp b/offload/tools/offload-tblgen/offload-tblgen.cpp deleted file mode 100644 index 1912abf5265c7..0000000000000 --- a/offload/tools/offload-tblgen/offload-tblgen.cpp +++ /dev/null @@ -1,101 +0,0 @@ -//===- offload-tblgen/offload-tblgen.cpp ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a Tablegen tool that produces source files for the Offload project. -// See offload/API/README.md for more information. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/TableGen/Main.h" -#include "llvm/TableGen/Record.h" - -#include "Generators.hpp" - -namespace llvm { -namespace offload { -namespace tblgen { - -enum ActionType { - PrintRecords, - DumpJSON, - GenAPI, - GenFuncNames, - GenImplFuncDecls, - GenEntryPoints, - GenPrintHeader, - GenExports -}; - -namespace { -cl::opt Action( - cl::desc("Action to perform:"), - cl::values( - clEnumValN(PrintRecords, "print-records", - "Print all records to stdout (default)"), - clEnumValN(DumpJSON, "dump-json", - "Dump all records as machine-readable JSON"), - clEnumValN(GenAPI, "gen-api", "Generate Offload API header contents"), - clEnumValN(GenFuncNames, "gen-func-names", - "Generate a list of all Offload API function names"), - clEnumValN( - GenImplFuncDecls, "gen-impl-func-decls", - "Generate declarations for Offload API implementation functions"), - clEnumValN(GenEntryPoints, "gen-entry-points", - "Generate Offload API wrapper function definitions"), - clEnumValN(GenPrintHeader, "gen-print-header", - "Generate Offload API print header"), - clEnumValN(GenExports, "gen-exports", - "Generate export file for the Offload library"))); -} - -static bool OffloadTableGenMain(raw_ostream &OS, const RecordKeeper &Records) { - switch (Action) { - case PrintRecords: - OS << Records; - break; - case DumpJSON: - EmitJSON(Records, OS); - break; - case GenAPI: - EmitOffloadAPI(Records, OS); - break; - case GenFuncNames: - EmitOffloadFuncNames(Records, OS); - break; - case GenImplFuncDecls: - EmitOffloadImplFuncDecls(Records, OS); - break; - case GenEntryPoints: - EmitOffloadEntryPoints(Records, OS); - break; - case GenPrintHeader: - EmitOffloadPrintHeader(Records, OS); - break; - case GenExports: - EmitOffloadExports(Records, OS); - break; - } - - return false; -} - -int OffloadTblgenMain(int argc, char **argv) { - InitLLVM y(argc, argv); - cl::ParseCommandLineOptions(argc, argv); - return TableGenMain(argv[0], &OffloadTableGenMain); -} -} // namespace tblgen -} // namespace offload -} // namespace llvm - -using namespace llvm; -using namespace offload::tblgen; - -int main(int argc, char **argv) { return OffloadTblgenMain(argc, argv); } diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt index 25ac4b2fa3675..73c87b708d25f 100644 --- a/offload/unittests/CMakeLists.txt +++ b/offload/unittests/CMakeLists.txt @@ -5,5 +5,4 @@ function(add_libompt_unittest test_dirname) add_unittest(LibomptUnitTests ${test_dirname} ${ARGN}) endfunction() -# add_subdirectory(Plugins) -add_subdirectory(OffloadAPI) +add_subdirectory(Plugins) diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt deleted file mode 100644 index 033ee2b6ec746..0000000000000 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -set(PLUGINS_TEST_COMMON LLVMOffload) -set(PLUGINS_TEST_INCLUDE ${LIBOMPTARGET_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/common) - -add_libompt_unittest("offload.unittests" - ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatform.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformCount.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/platform/olGetPlatformInfoSize.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDevice.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceCount.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp) -add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON}) -target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON}) -target_include_directories("offload.unittests" PRIVATE ${PLUGINS_TEST_INCLUDE}) diff --git a/offload/unittests/OffloadAPI/common/Environment.cpp b/offload/unittests/OffloadAPI/common/Environment.cpp deleted file mode 100644 index f07a66cda2189..0000000000000 --- a/offload/unittests/OffloadAPI/common/Environment.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===------- Offload API tests - gtest environment ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Environment.hpp" -#include "Fixtures.hpp" -#include "llvm/Support/CommandLine.h" -#include - -using namespace llvm; - -// Wrapper so we don't have to constantly init and shutdown Offload in every -// test, while having sensible lifetime for the platform environment -struct OffloadInitWrapper { - OffloadInitWrapper() { olInit(); } - ~OffloadInitWrapper() { olShutDown(); } -}; -static OffloadInitWrapper Wrapper{}; - -static cl::opt - SelectedPlatform("platform", cl::desc("Only test the specified platform"), - cl::value_desc("platform")); - -std::ostream &operator<<(std::ostream &Out, - const ol_platform_handle_t &Platform) { - size_t Size; - olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_NAME, &Size); - std::vector Name(Size); - olGetPlatformInfo(Platform, OL_PLATFORM_INFO_NAME, Size, Name.data()); - Out << Name.data(); - return Out; -} - -std::ostream &operator<<(std::ostream &Out, - const std::vector &Platforms) { - for (auto Platform : Platforms) { - Out << "\n * \"" << Platform << "\""; - } - return Out; -} - -const std::vector &TestEnvironment::getPlatforms() { - static std::vector Platforms{}; - - if (Platforms.empty()) { - uint32_t PlatformCount = 0; - olGetPlatformCount(&PlatformCount); - if (PlatformCount > 0) { - Platforms.resize(PlatformCount); - olGetPlatform(PlatformCount, Platforms.data()); - } - } - - return Platforms; -} - -// Get a single platform, which may be selected by the user. -ol_platform_handle_t TestEnvironment::getPlatform() { - static ol_platform_handle_t Platform = nullptr; - const auto &Platforms = getPlatforms(); - - if (!Platform) { - if (SelectedPlatform != "") { - for (const auto CandidatePlatform : Platforms) { - std::stringstream PlatformName; - PlatformName << CandidatePlatform; - if (SelectedPlatform == PlatformName.str()) { - Platform = CandidatePlatform; - return Platform; - } - } - std::cout << "No platform found with the name \"" << SelectedPlatform - << "\". Choose from:" << Platforms << "\n"; - std::exit(1); - } else { - // Pick a single platform. We prefer one that has available devices, but - // just pick the first initially in case none have any devices. - Platform = Platforms[0]; - for (auto CandidatePlatform : Platforms) { - uint32_t NumDevices = 0; - if (olGetDeviceCount(CandidatePlatform, &NumDevices) == OL_SUCCESS) { - if (NumDevices > 0) { - Platform = CandidatePlatform; - break; - } - } - } - } - } - - return Platform; -} diff --git a/offload/unittests/OffloadAPI/common/Environment.hpp b/offload/unittests/OffloadAPI/common/Environment.hpp deleted file mode 100644 index 6dba2381eb0b7..0000000000000 --- a/offload/unittests/OffloadAPI/common/Environment.hpp +++ /dev/null @@ -1,17 +0,0 @@ -//===------- Offload API tests - gtest environment ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -namespace TestEnvironment { -const std::vector &getPlatforms(); -ol_platform_handle_t getPlatform(); -} // namespace TestEnvironment diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp deleted file mode 100644 index 410a435dee1b5..0000000000000 --- a/offload/unittests/OffloadAPI/common/Fixtures.hpp +++ /dev/null @@ -1,64 +0,0 @@ -//===------- Offload API tests - gtest fixtures --==-----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include - -#include "Environment.hpp" - -#pragma once - -#ifndef ASSERT_SUCCESS -#define ASSERT_SUCCESS(ACTUAL) ASSERT_EQ(OL_SUCCESS, ACTUAL) -#endif - -// TODO: rework this so the EXPECTED/ACTUAL results are readable -#ifndef ASSERT_ERROR -#define ASSERT_ERROR(EXPECTED, ACTUAL) \ - do { \ - ol_result_t Res = ACTUAL; \ - ASSERT_TRUE(Res && (Res->Code == EXPECTED)); \ - } while (0) -#endif - -#define RETURN_ON_FATAL_FAILURE(...) \ - __VA_ARGS__; \ - if (this->HasFatalFailure() || this->IsSkipped()) { \ - return; \ - } \ - (void)0 - -struct offloadTest : ::testing::Test { - // No special behavior now, but just in case we need to override it in future -}; - -struct offloadPlatformTest : offloadTest { - void SetUp() override { - RETURN_ON_FATAL_FAILURE(offloadTest::SetUp()); - - Platform = TestEnvironment::getPlatform(); - ASSERT_NE(Platform, nullptr); - } - - ol_platform_handle_t Platform; -}; - -struct offloadDeviceTest : offloadPlatformTest { - void SetUp() override { - RETURN_ON_FATAL_FAILURE(offloadPlatformTest::SetUp()); - - uint32_t NumDevices; - ASSERT_SUCCESS(olGetDeviceCount(Platform, &NumDevices)); - if (NumDevices == 0) - GTEST_SKIP() << "No available devices on this platform."; - ASSERT_SUCCESS(olGetDevice(Platform, 1, &Device)); - } - - ol_device_handle_t Device; -}; diff --git a/offload/unittests/OffloadAPI/device/olDeviceInfo.hpp b/offload/unittests/OffloadAPI/device/olDeviceInfo.hpp deleted file mode 100644 index 06915258da384..0000000000000 --- a/offload/unittests/OffloadAPI/device/olDeviceInfo.hpp +++ /dev/null @@ -1,21 +0,0 @@ -//===------- Offload API tests - Helpers for device info query testing ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include - -// TODO: We could autogenerate these -inline std::vector DeviceQueries = { - OL_DEVICE_INFO_TYPE, OL_DEVICE_INFO_PLATFORM, OL_DEVICE_INFO_NAME, - OL_DEVICE_INFO_VENDOR, OL_DEVICE_INFO_DRIVER_VERSION}; - -inline std::unordered_map DeviceInfoSizeMap = { - {OL_DEVICE_INFO_TYPE, sizeof(ol_device_type_t)}, - {OL_DEVICE_INFO_PLATFORM, sizeof(ol_platform_handle_t)}, -}; diff --git a/offload/unittests/OffloadAPI/device/olGetDevice.cpp b/offload/unittests/OffloadAPI/device/olGetDevice.cpp deleted file mode 100644 index 68d4682dd3351..0000000000000 --- a/offload/unittests/OffloadAPI/device/olGetDevice.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//===------- Offload API tests - olGetDevice -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../common/Fixtures.hpp" -#include -#include - -using olGetDeviceTest = offloadPlatformTest; - -TEST_F(olGetDeviceTest, Success) { - uint32_t Count = 0; - ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count)); - if (Count == 0) - GTEST_SKIP() << "No available devices on this platform."; - - std::vector Devices(Count); - ASSERT_SUCCESS(olGetDevice(Platform, Count, Devices.data())); - for (auto Device : Devices) { - ASSERT_NE(nullptr, Device); - } -} - -TEST_F(olGetDeviceTest, SuccessSubsetOfDevices) { - uint32_t Count; - ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count)); - if (Count < 2) - GTEST_SKIP() << "Only one device is available on this platform."; - - std::vector Devices(Count - 1); - ASSERT_SUCCESS(olGetDevice(Platform, Count - 1, Devices.data())); - for (auto Device : Devices) { - ASSERT_NE(nullptr, Device); - } -} diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp deleted file mode 100644 index ef377d671bf60..0000000000000 --- a/offload/unittests/OffloadAPI/device/olGetDeviceCount.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===------- Offload API tests - olGetDeviceCount --------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../common/Fixtures.hpp" -#include -#include - -using olGetDeviceCountTest = offloadPlatformTest; - -TEST_F(olGetDeviceCountTest, Success) { - uint32_t Count = 0; - ASSERT_SUCCESS(olGetDeviceCount(Platform, &Count)); -} - -TEST_F(olGetDeviceCountTest, InvalidNullPlatform) { - uint32_t Count = 0; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olGetDeviceCount(nullptr, &Count)); -} - -TEST_F(olGetDeviceCountTest, InvalidNullPointer) { - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, - olGetDeviceCount(Platform, nullptr)); -} diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp deleted file mode 100644 index c936802fb1e4d..0000000000000 --- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp +++ /dev/null @@ -1,76 +0,0 @@ -//===------- Offload API tests - olGetDeviceInfo ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../common/Fixtures.hpp" -#include "olDeviceInfo.hpp" -#include -#include - -struct olGetDeviceInfoTest : offloadDeviceTest, - ::testing::WithParamInterface { - - void SetUp() override { RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp()); } -}; - -INSTANTIATE_TEST_SUITE_P( - , olGetDeviceInfoTest, ::testing::ValuesIn(DeviceQueries), - [](const ::testing::TestParamInfo &info) { - std::stringstream ss; - ss << info.param; - return ss.str(); - }); - -TEST_P(olGetDeviceInfoTest, Success) { - ol_device_info_t InfoType = GetParam(); - size_t Size = 0; - - ASSERT_SUCCESS(olGetDeviceInfoSize(Device, InfoType, &Size)); - - std::vector InfoData(Size); - ASSERT_SUCCESS(olGetDeviceInfo(Device, InfoType, Size, InfoData.data())); - - if (InfoType == OL_DEVICE_INFO_PLATFORM) { - auto *ReturnedPlatform = - reinterpret_cast(InfoData.data()); - ASSERT_EQ(Platform, *ReturnedPlatform); - } -} - -TEST_F(olGetDeviceInfoTest, InvalidNullHandleDevice) { - ol_device_type_t DeviceType; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, - olGetDeviceInfo(nullptr, OL_DEVICE_INFO_TYPE, - sizeof(ol_device_type_t), &DeviceType)); -} - -TEST_F(olGetDeviceInfoTest, InvalidEnumerationInfoType) { - ol_device_type_t DeviceType; - ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, - olGetDeviceInfo(Device, OL_DEVICE_INFO_FORCE_UINT32, - sizeof(ol_device_type_t), &DeviceType)); -} - -TEST_F(olGetDeviceInfoTest, InvalidSizePropSize) { - ol_device_type_t DeviceType; - ASSERT_ERROR(OL_ERRC_INVALID_SIZE, - olGetDeviceInfo(Device, OL_DEVICE_INFO_TYPE, 0, &DeviceType)); -} - -TEST_F(olGetDeviceInfoTest, InvalidSizePropSizeSmall) { - ol_device_type_t DeviceType; - ASSERT_ERROR(OL_ERRC_INVALID_SIZE, - olGetDeviceInfo(Device, OL_DEVICE_INFO_TYPE, - sizeof(DeviceType) - 1, &DeviceType)); -} - -TEST_F(olGetDeviceInfoTest, InvalidNullPointerPropValue) { - ol_device_type_t DeviceType; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, - olGetDeviceInfo(Device, OL_DEVICE_INFO_TYPE, sizeof(DeviceType), - nullptr)); -} diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp deleted file mode 100644 index 9e792d1c3e25e..0000000000000 --- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===------- Offload API tests - olGetDeviceInfoSize -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "../common/Fixtures.hpp" -#include "olDeviceInfo.hpp" - -struct olGetDeviceInfoSizeTest - : offloadDeviceTest, - ::testing::WithParamInterface { - - void SetUp() override { RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp()); } -}; - -// TODO: We could autogenerate the list of enum values -INSTANTIATE_TEST_SUITE_P( - , olGetDeviceInfoSizeTest, ::testing::ValuesIn(DeviceQueries), - [](const ::testing::TestParamInfo &info) { - std::stringstream ss; - ss << info.param; - return ss.str(); - }); - -TEST_P(olGetDeviceInfoSizeTest, Success) { - ol_device_info_t InfoType = GetParam(); - size_t Size = 0; - - ASSERT_SUCCESS(olGetDeviceInfoSize(Device, InfoType, &Size)); - auto ExpectedSize = DeviceInfoSizeMap.find(InfoType); - if (ExpectedSize != DeviceInfoSizeMap.end()) { - ASSERT_EQ(Size, ExpectedSize->second); - } else { - ASSERT_NE(Size, 0lu); - } -} - -TEST_F(olGetDeviceInfoSizeTest, InvalidNullHandle) { - size_t Size = 0; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, - olGetDeviceInfoSize(nullptr, OL_DEVICE_INFO_TYPE, &Size)); -} - -TEST_F(olGetDeviceInfoSizeTest, InvalidDeviceInfoEnumeration) { - size_t Size = 0; - ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, - olGetDeviceInfoSize(Device, OL_DEVICE_INFO_FORCE_UINT32, &Size)); -} - -TEST_F(olGetDeviceInfoSizeTest, InvalidNullPointer) { - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, - olGetDeviceInfoSize(Device, OL_DEVICE_INFO_TYPE, nullptr)); -} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp deleted file mode 100644 index 4a2f9e8ac7741..0000000000000 --- a/offload/unittests/OffloadAPI/platform/olGetPlatform.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===------- Offload API tests - olGetPlatform -----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../common/Fixtures.hpp" -#include -#include - -using olGetPlatformTest = offloadTest; - -TEST_F(olGetPlatformTest, Success) { - uint32_t PlatformCount; - ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount)); - std::vector Platforms(PlatformCount); - ASSERT_SUCCESS(olGetPlatform(PlatformCount, Platforms.data())); -} - -TEST_F(olGetPlatformTest, InvalidNumEntries) { - uint32_t PlatformCount; - ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount)); - std::vector Platforms(PlatformCount); - ASSERT_ERROR(OL_ERRC_INVALID_SIZE, - olGetPlatform(PlatformCount + 1, Platforms.data())); -} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp deleted file mode 100644 index 15b4b6abcd70d..0000000000000 --- a/offload/unittests/OffloadAPI/platform/olGetPlatformCount.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===------- Offload API tests - olGetPlatformCount ------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../common/Fixtures.hpp" -#include -#include - -using olGetPlatformCountTest = offloadTest; - -TEST_F(olGetPlatformCountTest, Success) { - uint32_t PlatformCount; - ASSERT_SUCCESS(olGetPlatformCount(&PlatformCount)); -} - -TEST_F(olGetPlatformCountTest, InvalidNullPointer) { - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olGetPlatformCount(nullptr)); -} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp deleted file mode 100644 index c646bdc50b7da..0000000000000 --- a/offload/unittests/OffloadAPI/platform/olGetPlatformInfo.cpp +++ /dev/null @@ -1,76 +0,0 @@ -//===------- Offload API tests - olGetPlatformInfo -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "../common/Fixtures.hpp" -#include "olPlatformInfo.hpp" - -struct olGetPlatformInfoTest - : offloadPlatformTest, - ::testing::WithParamInterface {}; - -INSTANTIATE_TEST_SUITE_P( - olGetPlatformInfo, olGetPlatformInfoTest, - ::testing::ValuesIn(PlatformQueries), - [](const ::testing::TestParamInfo &info) { - std::stringstream ss; - ss << info.param; - return ss.str(); - }); - -TEST_P(olGetPlatformInfoTest, Success) { - size_t Size = 0; - ol_platform_info_t InfoType = GetParam(); - - ASSERT_SUCCESS(olGetPlatformInfoSize(Platform, InfoType, &Size)); - std::vector InfoData(Size); - ASSERT_SUCCESS(olGetPlatformInfo(Platform, InfoType, Size, InfoData.data())); - - // Info types with a dynamic size are all char[] so we can verify the returned - // string is the expected size. - auto ExpectedSize = PlatformInfoSizeMap.find(InfoType); - if (ExpectedSize == PlatformInfoSizeMap.end()) { - ASSERT_EQ(Size, strlen(InfoData.data()) + 1); - } -} - -TEST_F(olGetPlatformInfoTest, InvalidNullHandle) { - ol_platform_backend_t Backend; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, - olGetPlatformInfo(nullptr, OL_PLATFORM_INFO_BACKEND, - sizeof(Backend), &Backend)); -} - -TEST_F(olGetPlatformInfoTest, InvalidPlatformInfoEnumeration) { - ol_platform_backend_t Backend; - ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, - olGetPlatformInfo(Platform, OL_PLATFORM_INFO_FORCE_UINT32, - sizeof(Backend), &Backend)); -} - -TEST_F(olGetPlatformInfoTest, InvalidSizeZero) { - ol_platform_backend_t Backend; - ASSERT_ERROR( - OL_ERRC_INVALID_SIZE, - olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, 0, &Backend)); -} - -TEST_F(olGetPlatformInfoTest, InvalidSizeSmall) { - ol_platform_backend_t Backend; - ASSERT_ERROR(OL_ERRC_INVALID_SIZE, - olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, - sizeof(Backend) - 1, &Backend)); -} - -TEST_F(olGetPlatformInfoTest, InvalidNullPointerPropValue) { - ol_platform_backend_t Backend; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, - olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, - sizeof(Backend), nullptr)); -} diff --git a/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp b/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp deleted file mode 100644 index 7c9274082e8e4..0000000000000 --- a/offload/unittests/OffloadAPI/platform/olGetPlatformInfoSize.cpp +++ /dev/null @@ -1,57 +0,0 @@ -//===------- Offload API tests - olGetPlatformInfoSize ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "../common/Fixtures.hpp" -#include "olPlatformInfo.hpp" - -struct olGetPlatformInfoSizeTest - : offloadPlatformTest, - ::testing::WithParamInterface {}; - -INSTANTIATE_TEST_SUITE_P( - olGetPlatformInfoSize, olGetPlatformInfoSizeTest, - ::testing::ValuesIn(PlatformQueries), - [](const ::testing::TestParamInfo &info) { - std::stringstream ss; - ss << info.param; - return ss.str(); - }); - -TEST_P(olGetPlatformInfoSizeTest, Success) { - size_t Size = 0; - ol_platform_info_t InfoType = GetParam(); - - ASSERT_SUCCESS(olGetPlatformInfoSize(Platform, InfoType, &Size)); - auto ExpectedSize = PlatformInfoSizeMap.find(InfoType); - if (ExpectedSize != PlatformInfoSizeMap.end()) { - ASSERT_EQ(Size, ExpectedSize->second); - } else { - ASSERT_NE(Size, 0lu); - } -} - -TEST_F(olGetPlatformInfoSizeTest, InvalidNullHandle) { - size_t Size = 0; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, - olGetPlatformInfoSize(nullptr, OL_PLATFORM_INFO_BACKEND, &Size)); -} - -TEST_F(olGetPlatformInfoSizeTest, InvalidPlatformInfoEnumeration) { - size_t Size = 0; - ASSERT_ERROR( - OL_ERRC_INVALID_ENUMERATION, - olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_FORCE_UINT32, &Size)); -} - -TEST_F(olGetPlatformInfoSizeTest, InvalidNullPointer) { - ASSERT_ERROR( - OL_ERRC_INVALID_NULL_POINTER, - olGetPlatformInfoSize(Platform, OL_PLATFORM_INFO_BACKEND, nullptr)); -} diff --git a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp deleted file mode 100644 index d49cdb90d321a..0000000000000 --- a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp +++ /dev/null @@ -1,20 +0,0 @@ -//===------- Offload API tests - Helpers for platform info query testing --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include - -// TODO: We could autogenerate these - -inline std::vector PlatformQueries = { - OL_PLATFORM_INFO_NAME, OL_PLATFORM_INFO_VENDOR_NAME, - OL_PLATFORM_INFO_VERSION, OL_PLATFORM_INFO_BACKEND}; - -inline std::unordered_map PlatformInfoSizeMap = { - {OL_PLATFORM_INFO_BACKEND, sizeof(ol_platform_backend_t)}, -}; From d8b5af45040431c44c1766aa505a6edb786ef81b Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 3 Dec 2024 12:50:13 -0800 Subject: [PATCH 126/191] Revert "Reland "Add a pass to collect dropped var stats for MIR" (#117044)" This reverts commit 249755cedb17ffa707253edcef1a388f807caa35. Broke https://lab.llvm.org/buildbot/#/builders/160/builds/9420 Note: This is test shard 99 of 154. [==========] Running 2 tests from 2 test suites. [----------] Global test environment set-up. [----------] 1 test from DroppedVariableStatsMIR [ RUN ] DroppedVariableStatsMIR.InlinedAt -- exit: -11 --- .../llvm/CodeGen/DroppedVariableStats.h | 48 +- .../llvm/CodeGen/MachineFunctionPass.h | 2 - llvm/lib/CodeGen/DroppedVariableStats.cpp | 63 +- llvm/lib/CodeGen/MachineFunctionPass.cpp | 15 +- llvm/unittests/CodeGen/CMakeLists.txt | 1 - .../CodeGen/DroppedVariableStatsMIRTest.cpp | 1067 ----------------- 6 files changed, 3 insertions(+), 1193 deletions(-) delete mode 100644 llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h index f6050c68c91aa..371d775b02e87 100644 --- a/llvm/include/llvm/CodeGen/DroppedVariableStats.h +++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h @@ -7,7 +7,7 @@ ///===---------------------------------------------------------------------===// /// \file /// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. +/// of #dbg_value that get dropped due to an optimization pass. /// ///===---------------------------------------------------------------------===// @@ -221,52 +221,6 @@ class DroppedVariableStatsIR : public DroppedVariableStats { } }; -/// A class to collect and print dropped debug information due to MIR -/// optimization passes. After every MIR pass is run, it will print how many -/// #DBG_VALUEs were dropped due to that pass. -class DroppedVariableStatsMIR : public DroppedVariableStats { -public: - DroppedVariableStatsMIR() : llvm::DroppedVariableStats(false) {} - - void runBeforePass(StringRef PassID, MachineFunction *MF) { - if (PassID == "Debug Variable Analysis") - return; - setup(); - return runOnMachineFunction(MF, true); - } - - void runAfterPass(StringRef PassID, MachineFunction *MF) { - if (PassID == "Debug Variable Analysis") - return; - runOnMachineFunction(MF, false); - calculateDroppedVarStatsOnMachineFunction(MF, PassID, MF->getName().str()); - cleanup(); - } - -private: - const MachineFunction *MFunc; - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::MachineFunction. - void runOnMachineFunction(const MachineFunction *MF, bool Before); - /// Iterate over all Instructions in a MachineFunction and report any dropped - /// debug information. - void calculateDroppedVarStatsOnMachineFunction(const MachineFunction *MF, - StringRef PassID, - StringRef FuncOrModName); - /// Override base class method to run on an llvm::MachineFunction - /// specifically. - virtual void - visitEveryInstruction(unsigned &DroppedCount, - DenseMap &InlinedAtsMap, - VarID Var) override; - /// Override base class method to run on DBG_VALUEs specifically. - virtual void visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) override; -}; - } // namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/MachineFunctionPass.h b/llvm/include/llvm/CodeGen/MachineFunctionPass.h index d82b593497ffc..caaf22c2139e3 100644 --- a/llvm/include/llvm/CodeGen/MachineFunctionPass.h +++ b/llvm/include/llvm/CodeGen/MachineFunctionPass.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H -#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" @@ -68,7 +67,6 @@ class MachineFunctionPass : public FunctionPass { MachineFunctionProperties RequiredProperties; MachineFunctionProperties SetProperties; MachineFunctionProperties ClearedProperties; - DroppedVariableStatsMIR DroppedVarStatsMF; /// createPrinterPass - Get a machine function printer pass. Pass *createPrinterPass(raw_ostream &O, diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp index 71f91292160f5..122fcad1293f1 100644 --- a/llvm/lib/CodeGen/DroppedVariableStats.cpp +++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp @@ -7,7 +7,7 @@ ///===---------------------------------------------------------------------===// /// \file /// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. +/// of #dbg_value that get dropped due to an optimization pass. /// ///===---------------------------------------------------------------------===// @@ -192,64 +192,3 @@ void DroppedVariableStatsIR::visitEveryDebugRecord( } } } - -void DroppedVariableStatsMIR::runOnMachineFunction(const MachineFunction *MF, - bool Before) { - auto &DebugVariables = DebugVariablesStack.back()[&MF->getFunction()]; - auto FuncName = MF->getName(); - MFunc = MF; - run(DebugVariables, FuncName, Before); -} - -void DroppedVariableStatsMIR::calculateDroppedVarStatsOnMachineFunction( - const MachineFunction *MF, StringRef PassID, StringRef FuncOrModName) { - MFunc = MF; - StringRef FuncName = MF->getName(); - const Function *Func = &MF->getFunction(); - DebugVariables &DbgVariables = DebugVariablesStack.back()[Func]; - calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, - "MachineFunction", Func); -} - -void DroppedVariableStatsMIR::visitEveryInstruction( - unsigned &DroppedCount, DenseMap &InlinedAtsMap, - VarID Var) { - unsigned PrevDroppedCount = DroppedCount; - const DIScope *DbgValScope = std::get<0>(Var); - for (const auto &MBB : *MFunc) { - for (const auto &MI : MBB) { - if (!MI.isDebugInstr()) { - auto *DbgLoc = MI.getDebugLoc().get(); - if (!DbgLoc) - continue; - - auto *Scope = DbgLoc->getScope(); - if (updateDroppedCount(DbgLoc, Scope, DbgValScope, InlinedAtsMap, Var, - DroppedCount)) - break; - } - } - if (PrevDroppedCount != DroppedCount) { - PrevDroppedCount = DroppedCount; - break; - } - } -} - -void DroppedVariableStatsMIR::visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) { - for (const auto &MBB : *MFunc) { - for (const auto &MI : MBB) { - if (MI.isDebugValueLike()) { - auto *DbgVar = MI.getDebugVariable(); - if (!DbgVar) - continue; - auto DbgLoc = MI.getDebugLoc(); - populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, - FuncName, Before); - } - } - } -} diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp index e803811643f87..62ac3e32d24d9 100644 --- a/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -32,11 +32,6 @@ using namespace llvm; using namespace ore; -static cl::opt DroppedVarStatsMIR( - "dropped-variable-stats-mir", cl::Hidden, - cl::desc("Dump dropped debug variables stats for MIR passes"), - cl::init(false)); - Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { return createMachineFunctionPrinterPass(O, Banner); @@ -96,15 +91,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { MFProps.reset(ClearedProperties); - bool RV; - if (DroppedVarStatsMIR) { - auto PassName = getPassName(); - DroppedVarStatsMF.runBeforePass(PassName, &MF); - RV = runOnMachineFunction(MF); - DroppedVarStatsMF.runAfterPass(PassName, &MF); - } else { - RV = runOnMachineFunction(MF); - } + bool RV = runOnMachineFunction(MF); if (ShouldEmitSizeRemarks) { // We wanted size remarks. Check if there was a change to the number of diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 50ef1bb5b7af2..807fd1a9b7b56 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -28,7 +28,6 @@ add_llvm_unittest(CodeGenTests DIEHashTest.cpp DIETest.cpp DroppedVariableStatsIRTest.cpp - DroppedVariableStatsMIRTest.cpp DwarfStringPoolEntryRefTest.cpp InstrRefLDVTest.cpp LowLevelTypeTest.cpp diff --git a/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp deleted file mode 100644 index b26a89c7adcba..0000000000000 --- a/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp +++ /dev/null @@ -1,1067 +0,0 @@ -//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests -//----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/AsmParser/Parser.h" -#include "llvm/CodeGen/MIRParser/MIRParser.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Pass.h" -#include "llvm/Passes/StandardInstrumentations.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" -#include "gtest/gtest.h" -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace llvm; - -namespace { - -std::unique_ptr -createTargetMachine(std::string TT, StringRef CPU, StringRef FS) { - std::string Error; - const Target *T = TargetRegistry::lookupTarget(TT, Error); - if (!T) - return nullptr; - TargetOptions Options; - return std::unique_ptr( - static_cast(T->createTargetMachine( - TT, CPU, FS, Options, std::nullopt, std::nullopt))); -} - -std::unique_ptr parseMIR(const TargetMachine &TM, StringRef MIRCode, - MachineModuleInfo &MMI, LLVMContext *Context) { - SMDiagnostic Diagnostic; - std::unique_ptr M; - std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); - auto MIR = createMIRParser(std::move(MBuffer), *Context); - if (!MIR) - return nullptr; - - std::unique_ptr Mod = MIR->parseIRModule(); - if (!Mod) - return nullptr; - - Mod->setDataLayout(TM.createDataLayout()); - - if (MIR->parseMachineFunctions(*Mod, MMI)) { - M.reset(); - return nullptr; - } - return Mod; -} -// This test ensures that if a DBG_VALUE and an instruction that exists in the -// same scope as that DBG_VALUE are both deleted as a result of an optimization -// pass, debug information is considered not dropped. -TEST(DroppedVariableStatsMIR, BothDeleted) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4) - !12 = !DILocation(line: 2, column: 11, scope: !4) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - for (auto &MI : MBB) { - auto *DbgLoc = MI.getDebugLoc().get(); - if (DbgLoc) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), false); -} - -// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -// but an instruction that shares the same scope as the DBG_VALUE still exists, -// debug information is conisdered dropped. -TEST(DroppedVariableStatsMIR, DbgValLost) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4) - !12 = !DILocation(line: 2, column: 11, scope: !4) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); -} - -// This test ensures that if a #dbg_value is dropped after an optimization pass, -// but an instruction that has an unrelated scope as the #dbg_value still -// exists, debug information is conisdered not dropped. -TEST(DroppedVariableStatsMIR, UnrelatedScopes) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4) - !12 = !DILocation(line: 2, column: 11, scope: !13) - !13 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), false); -} - -// This test ensures that if a #dbg_value is dropped after an optimization pass, -// but an instruction that has a scope which is a child of the #dbg_value scope -// still exists, debug information is conisdered dropped. -TEST(DroppedVariableStatsMIR, ChildScopes) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4) - !12 = !DILocation(line: 2, column: 11, scope: !13) - !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); -} - -// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -// but an instruction that has a scope which is a child of the DBG_VALUE scope -// still exists, and the DBG_VALUE is inlined at another location, debug -// information is conisdered not dropped. -TEST(DroppedVariableStatsMIR, InlinedAt) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) - !12 = !DILocation(line: 2, column: 11, scope: !13) - !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) - !14 = !DILocation(line: 3, column: 2, scope: !4) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), false); -} - -// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -// but an instruction that has a scope which is a child of the DBG_VALUE scope -// still exists, and the DBG_VALUE and the instruction are inlined at another -// location, debug information is conisdered dropped. -TEST(DroppedVariableStatsMIR, InlinedAtShared) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) - !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !14) - !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) - !14 = !DILocation(line: 3, column: 2, scope: !4) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); -} - -// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -// but an instruction that has a scope which is a child of the DBG_VALUE scope -// still exists, and the instruction is inlined at a location that is the -// DBG_VALUE's inlined at location, debug information is conisdered dropped. -TEST(DroppedVariableStatsMIR, InlinedAtChild) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - - LLVMContext C; - - const char *MIR = - R"( ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { - entry: - #dbg_value(i32 %x, !10, !DIExpression(), !11) - %add = add nsw i32 %x, 1, !dbg !12 - ret i32 0 - } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!2} - !llvm.ident = !{!3} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") - !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") - !2 = !{i32 2, !"Debug Info Version", i32 3} - !3 = !{!"clang"} - !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) - !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") - !6 = !DISubroutineType(types: !7) - !7 = !{!8, !8} - !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !9 = !{!10} - !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) - !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) - !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !15) - !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) - !14 = !DILocation(line: 3, column: 2, scope: !4) - !15 = !DILocation(line: 4, column: 5, scope: !13, inlinedAt: !14) - -... ---- -name: _Z3fooi -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: false -isSSA: true -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: _, preferred-register: '', flags: [ ] } - - { id: 1, class: _, preferred-register: '', flags: [ ] } - - { id: 2, class: _, preferred-register: '', flags: [ ] } - - { id: 3, class: _, preferred-register: '', flags: [ ] } -liveins: - - { reg: '$w0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.1.entry: - liveins: $w0 - - %0:_(s32) = COPY $w0 - %1:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_CONSTANT i32 0 - DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 - %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 - $w0 = COPY %3(s32) - RET_ReallyLR implicit $w0 - )"; - auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); - MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); - ASSERT_TRUE(M); - - DroppedVariableStatsMIR Stats; - auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); - Stats.runBeforePass("Test", MF); - - // This loop simulates an IR pass that drops debug information. - for (auto &MBB : *MF) { - for (auto &MI : MBB) { - if (MI.isDebugValueLike()) { - MI.eraseFromParent(); - break; - } - } - break; - } - - Stats.runAfterPass("Test", MF); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); -} - -} // end anonymous namespace From 80987ef4b609301c0f4cf2de62f91aff6d32418f Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 3 Dec 2024 12:51:24 -0800 Subject: [PATCH 127/191] Revert "Reland [NFC] Move DroppedVariableStats to its own file and redesign it to be extensible. (#117042)" This reverts commit acf3b1aa932b2237c181686e52bc61584a80a3ff. Broke https://lab.llvm.org/buildbot/#/builders/76/builds/5002 tools/clang/lib/CodeGen/CMakeFiles/obj.clangCodeGen.dir/BackendUtil.cpp.o:(.toc+0x258): undefined reference to `vtable for llvm::DroppedVariableStatsIR' --- .../llvm/CodeGen/DroppedVariableStats.h | 226 ------------------ .../llvm/Passes/StandardInstrumentations.h | 80 ++++++- llvm/lib/CodeGen/CMakeLists.txt | 1 - llvm/lib/CodeGen/DroppedVariableStats.cpp | 194 --------------- llvm/lib/Passes/StandardInstrumentations.cpp | 178 +++++++++++++- llvm/unittests/CodeGen/CMakeLists.txt | 1 - llvm/unittests/IR/CMakeLists.txt | 1 + .../DroppedVariableStatsTest.cpp} | 74 +++--- 8 files changed, 299 insertions(+), 456 deletions(-) delete mode 100644 llvm/include/llvm/CodeGen/DroppedVariableStats.h delete mode 100644 llvm/lib/CodeGen/DroppedVariableStats.cpp rename llvm/unittests/{CodeGen/DroppedVariableStatsIRTest.cpp => IR/DroppedVariableStatsTest.cpp} (91%) diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h deleted file mode 100644 index 371d775b02e87..0000000000000 --- a/llvm/include/llvm/CodeGen/DroppedVariableStats.h +++ /dev/null @@ -1,226 +0,0 @@ -///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// -/// -/// Part of the LLVM Project, under the Apache License v2.0 with LLVM -/// Exceptions. See https://llvm.org/LICENSE.txt for license information. -/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -/// -///===---------------------------------------------------------------------===// -/// \file -/// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_value that get dropped due to an optimization pass. -/// -///===---------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H -#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H - -#include "llvm/CodeGen/MachinePassManager.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassInstrumentation.h" - -namespace llvm { - -/// A unique key that represents a debug variable. -/// First const DIScope *: Represents the scope of the debug variable. -/// Second const DIScope *: Represents the InlinedAt scope of the debug -/// variable. const DILocalVariable *: It is a pointer to the debug variable -/// itself. -using VarID = - std::tuple; - -/// A base class to collect and print dropped debug information variable -/// statistics. -class DroppedVariableStats { -public: - DroppedVariableStats(bool DroppedVarStatsEnabled) - : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { - if (DroppedVarStatsEnabled) - llvm::outs() - << "Pass Level, Pass Name, Num of Dropped Variables, Func or " - "Module Name\n"; - }; - - virtual ~DroppedVariableStats() = default; - - // We intend this to be unique per-compilation, thus no copies. - DroppedVariableStats(const DroppedVariableStats &) = delete; - void operator=(const DroppedVariableStats &) = delete; - - bool getPassDroppedVariables() { return PassDroppedVariables; } - -protected: - void setup() { - DebugVariablesStack.push_back( - {DenseMap()}); - InlinedAts.push_back( - {DenseMap>()}); - } - - void cleanup() { - assert(!DebugVariablesStack.empty() && - "DebugVariablesStack shouldn't be empty!"); - assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); - DebugVariablesStack.pop_back(); - InlinedAts.pop_back(); - } - - bool DroppedVariableStatsEnabled = false; - struct DebugVariables { - /// DenseSet of VarIDs before an optimization pass has run. - DenseSet DebugVariablesBefore; - /// DenseSet of VarIDs after an optimization pass has run. - DenseSet DebugVariablesAfter; - }; - -protected: - /// A stack of a DenseMap, that maps DebugVariables for every pass to an - /// llvm::Function. A stack is used because an optimization pass can call - /// other passes. - SmallVector> DebugVariablesStack; - - /// A DenseSet tracking whether a scope was visited before. - DenseSet VisitedScope; - /// A stack of DenseMaps, which map the name of an llvm::Function to a - /// DenseMap of VarIDs and their inlinedAt locations before an optimization - /// pass has run. - SmallVector>> InlinedAts; - /// Calculate the number of dropped variables in an llvm::Function or - /// llvm::MachineFunction and print the relevant information to stdout. - void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, - StringRef FuncName, StringRef PassID, - StringRef FuncOrModName, - StringRef PassLevel, const Function *Func); - - /// Check if a \p Var has been dropped or is a false positive. Also update the - /// \p DroppedCount if a debug variable is dropped. - bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, - const DIScope *DbgValScope, - DenseMap &InlinedAtsMap, - VarID Var, unsigned &DroppedCount); - /// Run code to populate relevant data structures over an llvm::Function or - /// llvm::MachineFunction. - void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before); - /// Populate the VarIDSet and InlinedAtMap with the relevant information - /// needed for before and after pass analysis to determine dropped variable - /// status. - void populateVarIDSetAndInlinedMap( - const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before); - /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the - /// debug variable denoted by its ID \p Var may have been dropped by an - /// optimization pass. - virtual void - visitEveryInstruction(unsigned &DroppedCount, - DenseMap &InlinedAtsMap, - VarID Var) = 0; - /// Visit every debug record in an llvm::Function or llvm::MachineFunction - /// and call populateVarIDSetAndInlinedMap on it. - virtual void visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) = 0; - -private: - /// Remove a dropped debug variable's VarID from all Sets in the - /// DroppedVariablesBefore stack. - void removeVarFromAllSets(VarID Var, const Function *F) { - // Do not remove Var from the last element, it will be popped from the - // stack. - for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) - DebugVariablesMap[F].DebugVariablesBefore.erase(Var); - } - /// Return true if \p Scope is the same as \p DbgValScope or a child scope of - /// \p DbgValScope, return false otherwise. - bool isScopeChildOfOrEqualTo(const DIScope *Scope, - const DIScope *DbgValScope); - /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of - /// the InlinedAt chain, return false otherwise. - bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, - const DILocation *DbgValInlinedAt); - bool PassDroppedVariables = false; -}; - -/// A class to collect and print dropped debug information due to LLVM IR -/// optimization passes. After every LLVM IR pass is run, it will print how many -/// #dbg_values were dropped due to that pass. -class DroppedVariableStatsIR : public DroppedVariableStats { -public: - DroppedVariableStatsIR(bool DroppedVarStatsEnabled) - : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} - - virtual ~DroppedVariableStatsIR() = default; - - void runBeforePass(Any IR) { - setup(); - if (const auto *M = unwrapIR(IR)) - return this->runOnModule(M, true); - if (const auto *F = unwrapIR(IR)) - return this->runOnFunction(F, true); - } - - void runAfterPass(StringRef P, Any IR) { - if (const auto *M = unwrapIR(IR)) - runAfterPassModule(P, M); - else if (const auto *F = unwrapIR(IR)) - runAfterPassFunction(P, F); - cleanup(); - } - - void registerCallbacks(PassInstrumentationCallbacks &PIC); - -private: - const Function *Func; - - void runAfterPassFunction(StringRef PassID, const Function *F) { - runOnFunction(F, false); - calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), - "Function"); - } - - void runAfterPassModule(StringRef PassID, const Module *M) { - runOnModule(M, false); - calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); - } - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Function. - void runOnFunction(const Function *F, bool Before); - /// Iterate over all Instructions in a Function and report any dropped debug - /// information. - void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, - StringRef FuncOrModName, - StringRef PassLevel); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Module. Calls runOnFunction on every Function in the Module. - void runOnModule(const Module *M, bool Before); - /// Iterate over all Functions in a Module and report any dropped debug - /// information. Will call calculateDroppedVarStatsOnFunction on every - /// Function. - void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, - StringRef FuncOrModName, - StringRef PassLevel); - /// Override base class method to run on an llvm::Function specifically. - virtual void - visitEveryInstruction(unsigned &DroppedCount, - DenseMap &InlinedAtsMap, - VarID Var) override; - /// Override base class method to run on #dbg_values specifically. - virtual void visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) override; - - template static const IRUnitT *unwrapIR(Any IR) { - const IRUnitT **IRPtr = llvm::any_cast(&IR); - return IRPtr ? *IRPtr : nullptr; - } -}; - -} // namespace llvm - -#endif diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 12a34c099eaff..9301a12c740ee 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -580,6 +579,83 @@ class PrintCrashIRInstrumentation { static void SignalHandler(void *); }; +/// A class to collect and print dropped debug information variable statistics. +/// After every LLVM IR pass is run, it will print how many #dbg_values were +/// dropped due to that pass. +class DroppedVariableStats { +public: + DroppedVariableStats(bool DroppedVarStatsEnabled) { + if (DroppedVarStatsEnabled) + llvm::outs() + << "Pass Level, Pass Name, Num of Dropped Variables, Func or " + "Module Name\n"; + }; + // We intend this to be unique per-compilation, thus no copies. + DroppedVariableStats(const DroppedVariableStats &) = delete; + void operator=(const DroppedVariableStats &) = delete; + + void registerCallbacks(PassInstrumentationCallbacks &PIC); + void runBeforePass(StringRef PassID, Any IR); + void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); + void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); + bool getPassDroppedVariables() { return PassDroppedVariables; } + +private: + bool PassDroppedVariables = false; + /// A unique key that represents a #dbg_value. + using VarID = + std::tuple; + + struct DebugVariables { + /// DenseSet of VarIDs before an optimization pass has run. + DenseSet DebugVariablesBefore; + /// DenseSet of VarIDs after an optimization pass has run. + DenseSet DebugVariablesAfter; + }; + + /// A stack of a DenseMap, that maps DebugVariables for every pass to an + /// llvm::Function. A stack is used because an optimization pass can call + /// other passes. + SmallVector> DebugVariablesStack; + + /// A DenseSet tracking whether a scope was visited before. + DenseSet VisitedScope; + /// A stack of DenseMaps, which map the name of an llvm::Function to a + /// DenseMap of VarIDs and their inlinedAt locations before an optimization + /// pass has run. + SmallVector>> InlinedAts; + + /// Iterate over all Functions in a Module and report any dropped debug + /// information. Will call calculateDroppedVarStatsOnFunction on every + /// Function. + void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, + std::string FuncOrModName, + std::string PassLevel); + /// Iterate over all Instructions in a Function and report any dropped debug + /// information. + void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, + std::string FuncOrModName, + std::string PassLevel); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Function. + void runOnFunction(const Function *F, bool Before); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Module. Calls runOnFunction on every Function in the Module. + void runOnModule(const Module *M, bool Before); + /// Remove a dropped #dbg_value VarID from all Sets in the + /// DroppedVariablesBefore stack. + void removeVarFromAllSets(VarID Var, const Function *F); + /// Return true if \p Scope is the same as \p DbgValScope or a child scope of + /// \p DbgValScope, return false otherwise. + bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); + /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of + /// the InlinedAt chain, return false otherwise. + bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, + const DILocation *DbgValInlinedAt); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -597,7 +673,7 @@ class StandardInstrumentations { PrintCrashIRInstrumentation PrintCrashIR; IRChangedTester ChangeTester; VerifyInstrumentation Verify; - DroppedVariableStatsIR DroppedStatsIR; + DroppedVariableStats DroppedStats; bool VerifyEach; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 263d4a9ee94d2..7b47c0e6f75db 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -50,7 +50,6 @@ add_llvm_component_library(LLVMCodeGen DeadMachineInstructionElim.cpp DetectDeadLanes.cpp DFAPacketizer.cpp - DroppedVariableStats.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp deleted file mode 100644 index 122fcad1293f1..0000000000000 --- a/llvm/lib/CodeGen/DroppedVariableStats.cpp +++ /dev/null @@ -1,194 +0,0 @@ -///===- DroppedVariableStats.cpp ------------------------------------------===// -/// -/// Part of the LLVM Project, under the Apache License v2.0 with LLVM -/// Exceptions. See https://llvm.org/LICENSE.txt for license information. -/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -/// -///===---------------------------------------------------------------------===// -/// \file -/// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_value that get dropped due to an optimization pass. -/// -///===---------------------------------------------------------------------===// - -#include "llvm/CodeGen/DroppedVariableStats.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Module.h" - -using namespace llvm; - -bool DroppedVariableStats::isScopeChildOfOrEqualTo(const DIScope *Scope, - const DIScope *DbgValScope) { - while (Scope != nullptr) { - if (VisitedScope.find(Scope) == VisitedScope.end()) { - VisitedScope.insert(Scope); - if (Scope == DbgValScope) { - VisitedScope.clear(); - return true; - } - Scope = Scope->getScope(); - } else { - VisitedScope.clear(); - return false; - } - } - return false; -} - -bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( - const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { - if (DbgValInlinedAt == InlinedAt) - return true; - if (!DbgValInlinedAt) - return false; - auto *IA = InlinedAt; - while (IA) { - if (IA == DbgValInlinedAt) - return true; - IA = IA->getInlinedAt(); - } - return false; -} - -void DroppedVariableStats::calculateDroppedStatsAndPrint( - DebugVariables &DbgVariables, StringRef FuncName, StringRef PassID, - StringRef FuncOrModName, StringRef PassLevel, const Function *Func) { - unsigned DroppedCount = 0; - DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; - DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; - DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; - // Find an Instruction that shares the same scope as the dropped #dbg_value or - // has a scope that is the child of the scope of the #dbg_value, and has an - // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain - // contains the inlinedAt of the #dbg_value, if such an Instruction is found, - // debug information is dropped. - for (VarID Var : DebugVariablesBeforeSet) { - if (DebugVariablesAfterSet.contains(Var)) - continue; - visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); - removeVarFromAllSets(Var, Func); - } - if (DroppedCount > 0) { - llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " - << FuncOrModName << "\n"; - PassDroppedVariables = true; - } else - PassDroppedVariables = false; -} - -bool DroppedVariableStats::updateDroppedCount( - DILocation *DbgLoc, const DIScope *Scope, const DIScope *DbgValScope, - DenseMap &InlinedAtsMap, VarID Var, - unsigned &DroppedCount) { - - // If the Scope is a child of, or equal to the DbgValScope and is inlined at - // the Var's InlinedAt location, return true to signify that the Var has been - // dropped. - if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) - if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), - InlinedAtsMap[Var])) { - // Found another instruction in the variable's scope, so there exists a - // break point at which the variable could be observed. Count it as - // dropped. - DroppedCount++; - return true; - } - return false; -} - -void DroppedVariableStats::run(DebugVariables &DbgVariables, StringRef FuncName, - bool Before) { - auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore - : DbgVariables.DebugVariablesAfter); - auto &InlinedAtsMap = InlinedAts.back(); - if (Before) - InlinedAtsMap.try_emplace(FuncName, DenseMap()); - VarIDSet = DenseSet(); - visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); -} - -void DroppedVariableStats::populateVarIDSetAndInlinedMap( - const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) { - VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; - VarIDSet.insert(Key); - if (Before) - InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); -} - -void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) { - auto &DebugVariables = DebugVariablesStack.back()[F]; - auto FuncName = F->getName(); - Func = F; - run(DebugVariables, FuncName, Before); -} - -void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction( - const Function *F, StringRef PassID, StringRef FuncOrModName, - StringRef PassLevel) { - Func = F; - StringRef FuncName = F->getName(); - DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; - calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, - PassLevel, Func); -} - -void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) { - for (auto &F : *M) - runOnFunction(&F, Before); -} - -void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule( - const Module *M, StringRef PassID, StringRef FuncOrModName, - StringRef PassLevel) { - for (auto &F : *M) { - calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); - } -} - -void DroppedVariableStatsIR::registerCallbacks( - PassInstrumentationCallbacks &PIC) { - if (!DroppedVariableStatsEnabled) - return; - - PIC.registerBeforeNonSkippedPassCallback( - [this](StringRef P, Any IR) { return runBeforePass(IR); }); - PIC.registerAfterPassCallback( - [this](StringRef P, Any IR, const PreservedAnalyses &PA) { - return runAfterPass(P, IR); - }); - PIC.registerAfterPassInvalidatedCallback( - [this](StringRef P, const PreservedAnalyses &PA) { return cleanup(); }); -} - -void DroppedVariableStatsIR::visitEveryInstruction( - unsigned &DroppedCount, DenseMap &InlinedAtsMap, - VarID Var) { - const DIScope *DbgValScope = std::get<0>(Var); - for (const auto &I : instructions(Func)) { - auto *DbgLoc = I.getDebugLoc().get(); - if (!DbgLoc) - continue; - if (updateDroppedCount(DbgLoc, DbgLoc->getScope(), DbgValScope, - InlinedAtsMap, Var, DroppedCount)) - break; - } -} - -void DroppedVariableStatsIR::visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) { - for (const auto &I : instructions(Func)) { - for (DbgRecord &DR : I.getDbgRecordRange()) { - if (auto *Dbg = dyn_cast(&DR)) { - auto *DbgVar = Dbg->getVariable(); - auto DbgLoc = DR.getDebugLoc(); - populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, - FuncName, Before); - } - } - } -} diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index b766517e68eba..6259f8f736c80 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -2462,7 +2462,7 @@ StandardInstrumentations::StandardInstrumentations( PrintChanged == ChangePrinter::ColourDiffVerbose || PrintChanged == ChangePrinter::ColourDiffQuiet), WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), - Verify(DebugLogging), DroppedStatsIR(DroppedVarStats), + Verify(DebugLogging), DroppedStats(DroppedVarStats), VerifyEach(VerifyEach) {} PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = @@ -2523,6 +2523,180 @@ void PrintCrashIRInstrumentation::registerCallbacks( }); } +void DroppedVariableStats::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!DroppedVarStats) + return; + + PIC.registerBeforeNonSkippedPassCallback( + [this](StringRef P, Any IR) { return this->runBeforePass(P, IR); }); + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &PA) { + return this->runAfterPass(P, IR, PA); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &PA) { + return this->runAfterPassInvalidated(P, PA); + }); +} + +void DroppedVariableStats::runBeforePass(StringRef PassID, Any IR) { + DebugVariablesStack.push_back({DenseMap()}); + InlinedAts.push_back({DenseMap>()}); + if (auto *M = unwrapIR(IR)) + return this->runOnModule(M, true); + if (auto *F = unwrapIR(IR)) + return this->runOnFunction(F, true); +} + +void DroppedVariableStats::runOnFunction(const Function *F, bool Before) { + auto &DebugVariables = DebugVariablesStack.back()[F]; + auto &VarIDSet = (Before ? DebugVariables.DebugVariablesBefore + : DebugVariables.DebugVariablesAfter); + auto &InlinedAtsMap = InlinedAts.back(); + auto FuncName = F->getName(); + if (Before) + InlinedAtsMap.try_emplace(FuncName, DenseMap()); + VarIDSet = DenseSet(); + for (const auto &I : instructions(F)) { + for (DbgRecord &DR : I.getDbgRecordRange()) { + if (auto *Dbg = dyn_cast(&DR)) { + auto *DbgVar = Dbg->getVariable(); + auto DbgLoc = DR.getDebugLoc(); + VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; + VarIDSet.insert(Key); + if (Before) + InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); + } + } + } +} + +void DroppedVariableStats::runOnModule(const Module *M, bool Before) { + for (auto &F : *M) + runOnFunction(&F, Before); +} + +void DroppedVariableStats::removeVarFromAllSets(VarID Var, const Function *F) { + // Do not remove Var from the last element, it will be popped from the stack. + for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) + DebugVariablesMap[F].DebugVariablesBefore.erase(Var); +} + +void DroppedVariableStats::calculateDroppedVarStatsOnModule( + const Module *M, StringRef PassID, std::string FuncOrModName, + std::string PassLevel) { + for (auto &F : *M) { + calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); + } +} + +void DroppedVariableStats::calculateDroppedVarStatsOnFunction( + const Function *F, StringRef PassID, std::string FuncOrModName, + std::string PassLevel) { + unsigned DroppedCount = 0; + StringRef FuncName = F->getName(); + DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; + DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; + DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; + DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; + // Find an Instruction that shares the same scope as the dropped #dbg_value or + // has a scope that is the child of the scope of the #dbg_value, and has an + // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain + // contains the inlinedAt of the #dbg_value, if such an Instruction is found, + // debug information is dropped. + for (VarID Var : DebugVariablesBeforeSet) { + if (DebugVariablesAfterSet.contains(Var)) + continue; + const DIScope *DbgValScope = std::get<0>(Var); + for (const auto &I : instructions(F)) { + auto *DbgLoc = I.getDebugLoc().get(); + if (!DbgLoc) + continue; + + auto *Scope = DbgLoc->getScope(); + if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) { + if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), + InlinedAtsMap[Var])) { + // Found another instruction in the variable's scope, so there exists + // a break point at which the variable could be observed. Count it as + // dropped. + DroppedCount++; + break; + } + } + } + removeVarFromAllSets(Var, F); + } + if (DroppedCount > 0) { + llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " + << FuncOrModName << "\n"; + PassDroppedVariables = true; + } else + PassDroppedVariables = false; +} + +void DroppedVariableStats::runAfterPassInvalidated( + StringRef PassID, const PreservedAnalyses &PA) { + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); +} + +void DroppedVariableStats::runAfterPass(StringRef PassID, Any IR, + const PreservedAnalyses &PA) { + std::string PassLevel; + std::string FuncOrModName; + if (auto *M = unwrapIR(IR)) { + this->runOnModule(M, false); + PassLevel = "Module"; + FuncOrModName = M->getName(); + calculateDroppedVarStatsOnModule(M, PassID, FuncOrModName, PassLevel); + } else if (auto *F = unwrapIR(IR)) { + this->runOnFunction(F, false); + PassLevel = "Function"; + FuncOrModName = F->getName(); + calculateDroppedVarStatsOnFunction(F, PassID, FuncOrModName, PassLevel); + } + + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); +} + +bool DroppedVariableStats::isScopeChildOfOrEqualTo(DIScope *Scope, + const DIScope *DbgValScope) { + while (Scope != nullptr) { + if (VisitedScope.find(Scope) == VisitedScope.end()) { + VisitedScope.insert(Scope); + if (Scope == DbgValScope) { + VisitedScope.clear(); + return true; + } + Scope = Scope->getScope(); + } else { + VisitedScope.clear(); + return false; + } + } + return false; +} + +bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( + const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { + if (DbgValInlinedAt == InlinedAt) + return true; + if (!DbgValInlinedAt) + return false; + if (!InlinedAt) + return false; + auto *IA = InlinedAt; + while (IA) { + if (IA == DbgValInlinedAt) + return true; + IA = IA->getInlinedAt(); + } + return false; +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) { PrintIR.registerCallbacks(PIC); @@ -2538,7 +2712,7 @@ void StandardInstrumentations::registerCallbacks( WebsiteChangeReporter.registerCallbacks(PIC); ChangeTester.registerCallbacks(PIC); PrintCrashIR.registerCallbacks(PIC); - DroppedStatsIR.registerCallbacks(PIC); + DroppedStats.registerCallbacks(PIC); if (MAM) PreservedCFGChecker.registerCallbacks(PIC, *MAM); diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 807fd1a9b7b56..963cdcc0275e1 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_unittest(CodeGenTests CCStateTest.cpp DIEHashTest.cpp DIETest.cpp - DroppedVariableStatsIRTest.cpp DwarfStringPoolEntryRefTest.cpp InstrRefLDVTest.cpp LowLevelTypeTest.cpp diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index e5c8630f3eed7..ed93ee547d223 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_unittest(IRTests ShuffleVectorInstTest.cpp StructuralHashTest.cpp TimePassesTest.cpp + DroppedVariableStatsTest.cpp TypesTest.cpp UseTest.cpp UserTest.cpp diff --git a/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp b/llvm/unittests/IR/DroppedVariableStatsTest.cpp similarity index 91% rename from llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp rename to llvm/unittests/IR/DroppedVariableStatsTest.cpp index 094ec7b657634..61f3a87bb355e 100644 --- a/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp +++ b/llvm/unittests/IR/DroppedVariableStatsTest.cpp @@ -1,4 +1,5 @@ -//===- unittests/IR/DroppedVariableStatsIRTest.cpp ------------------------===// +//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests +//----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/AsmParser/Parser.h" -#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,7 +44,7 @@ namespace { // This test ensures that if a #dbg_value and an instruction that exists in the // same scope as that #dbg_value are both deleted as a result of an optimization // pass, debug information is considered not dropped. -TEST(DroppedVariableStatsIR, BothDeleted) { +TEST(DroppedVariableStats, BothDeleted) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -79,8 +79,9 @@ TEST(DroppedVariableStatsIR, BothDeleted) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -91,15 +92,16 @@ TEST(DroppedVariableStatsIR, BothDeleted) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that shares the same scope as the #dbg_value still exists, // debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, DbgValLost) { +TEST(DroppedVariableStats, DbgValLost) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -134,8 +136,9 @@ TEST(DroppedVariableStatsIR, DbgValLost) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -145,15 +148,16 @@ TEST(DroppedVariableStatsIR, DbgValLost) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has an unrelated scope as the #dbg_value still // exists, debug information is conisdered not dropped. -TEST(DroppedVariableStatsIR, UnrelatedScopes) { +TEST(DroppedVariableStats, UnrelatedScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -189,8 +193,9 @@ TEST(DroppedVariableStatsIR, UnrelatedScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -200,15 +205,16 @@ TEST(DroppedVariableStatsIR, UnrelatedScopes) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, ChildScopes) { +TEST(DroppedVariableStats, ChildScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -244,8 +250,9 @@ TEST(DroppedVariableStatsIR, ChildScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -255,8 +262,9 @@ TEST(DroppedVariableStatsIR, ChildScopes) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -264,7 +272,7 @@ TEST(DroppedVariableStatsIR, ChildScopes) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value is inlined at another location, debug // information is conisdered not dropped. -TEST(DroppedVariableStatsIR, InlinedAt) { +TEST(DroppedVariableStats, InlinedAt) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -300,8 +308,9 @@ TEST(DroppedVariableStatsIR, InlinedAt) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -311,8 +320,9 @@ TEST(DroppedVariableStatsIR, InlinedAt) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } @@ -320,7 +330,7 @@ TEST(DroppedVariableStatsIR, InlinedAt) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value and the instruction are inlined at another // location, debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, InlinedAtShared) { +TEST(DroppedVariableStats, InlinedAtShared) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -356,8 +366,9 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -367,8 +378,9 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -376,7 +388,7 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the instruction is inlined at a location that is the // #dbg_value's inlined at location, debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, InlinedAtChild) { +TEST(DroppedVariableStats, InlinedAtChild) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -413,8 +425,9 @@ TEST(DroppedVariableStatsIR, InlinedAtChild) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -424,8 +437,9 @@ TEST(DroppedVariableStatsIR, InlinedAtChild) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } From 9bf6365237f3a8a401afc0a69d2fb6d1b809ce68 Mon Sep 17 00:00:00 2001 From: Lee Wei Date: Tue, 3 Dec 2024 13:54:36 -0700 Subject: [PATCH 128/191] [llvm] Remove `br i1 undef` from some regression tests [NFC] (#118419) This PR removes tests with `br i1 undef` under `llvm/tests/Transforms/ObjCARC, Reassociate, SCCP, SLPVectorizer...`. After this PR, I'll continue to fix tests under `llvm/tests/CodeGen`, which has more UB tests than `llvm/tests/Transforms`. --- llvm/test/Transforms/ObjCARC/allocas.ll | 12 +- llvm/test/Transforms/ObjCARC/basic.ll | 16 +- llvm/test/Transforms/ObjCARC/cfg-hazards.ll | 98 ++-- .../Transforms/ObjCARC/contract-testcases.ll | 10 +- llvm/test/Transforms/ObjCARC/empty-block.ll | 8 +- llvm/test/Transforms/ObjCARC/path-overflow.ll | 446 +++++++++--------- .../Reassociate/2011-01-26-UseAfterFree.ll | 6 +- .../Reassociate/2012-06-08-InfiniteLoop.ll | 4 +- .../Reassociate/add_across_block_crash.ll | 6 +- .../Transforms/Reassociate/infloop-deadphi.ll | 6 +- .../Reassociate/reassociate-landingpad.ll | 4 +- .../SCCP/2004-12-10-UndefBranchBug.ll | 4 +- .../SCCP/2006-10-23-IPSCCP-Crash.ll | 4 +- .../SCCP/2008-01-27-UndefCorrelate.ll | 22 +- llvm/test/Transforms/SCCP/PR26044.ll | 23 +- llvm/test/Transforms/SCCP/crash.ll | 4 +- llvm/test/Transforms/SCCP/domtree-update.ll | 6 +- .../Transforms/SCCP/fp-bc-icmp-const-fold.ll | 10 +- .../Transforms/SCCP/ipsccp-preserve-pdt.ll | 11 +- .../SCCP/pr49582-iterator-invalidation.ll | 8 +- llvm/test/Transforms/SCCP/return-zapped.ll | 4 +- ...fter-each-resolving-undefs-for-function.ll | 2 +- .../SCCP/switch-constantfold-crash.ll | 62 ++- .../SLPVectorizer/AArch64/minimum-sizes.ll | 12 +- .../AArch64/reorder-fmuladd-crash.ll | 10 +- .../SLPVectorizer/AArch64/trunc-insertion.ll | 10 +- .../SLPVectorizer/X86/crash_7zip.ll | 10 +- .../SLPVectorizer/X86/crash_bullet.ll | 74 +-- .../SLPVectorizer/X86/crash_bullet3.ll | 46 +- .../SLPVectorizer/X86/crash_dequeue.ll | 18 +- .../SLPVectorizer/X86/crash_flop7.ll | 18 +- .../X86/crash_lencod-inseltpoison.ll | 38 +- .../SLPVectorizer/X86/crash_lencod.ll | 38 +- .../SLPVectorizer/X86/crash_mandeltext.ll | 38 +- .../SLPVectorizer/X86/crash_sim4b1.ll | 62 +-- .../SLPVectorizer/X86/crash_smallpt.ll | 28 +- .../SLPVectorizer/X86/crash_vectorizeTree.ll | 18 +- llvm/test/Transforms/SLPVectorizer/X86/cse.ll | 10 +- .../X86/memory-runtime-checks.ll | 18 +- .../X86/no-scheduled-instructions.ll | 8 +- .../Transforms/SLPVectorizer/X86/ordering.ll | 10 +- .../Transforms/SLPVectorizer/X86/partail.ll | 6 +- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 6 +- .../SLPVectorizer/X86/phi_overalignedtype.ll | 6 +- .../Transforms/SLPVectorizer/X86/pr16571.ll | 8 +- .../SLPVectorizer/X86/remark_unsupported.ll | 6 +- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 6 +- .../X86/reorder_with_reordered_users.ll | 10 +- .../Transforms/SLPVectorizer/X86/resched.ll | 6 +- .../X86/reuse-extracts-in-wider-vect.ll | 6 +- .../X86/revectorized_rdx_crash.ll | 6 +- .../X86/value-bug-inseltpoison.ll | 10 +- .../Transforms/SLPVectorizer/X86/value-bug.ll | 10 +- .../X86/vectorize-widest-phis.ll | 6 +- .../slp-umax-rdx-matcher-crash.ll | 6 +- llvm/test/Transforms/Scalarizer/crash-bug.ll | 8 +- .../Scalarizer/dbgloc-bug-inseltpoison.ll | 4 +- llvm/test/Transforms/Scalarizer/dbgloc-bug.ll | 4 +- .../Scalarizer/phi-unreachable-pred.ll | 20 +- .../2011-06-02-CritSwitch.ll | 6 +- .../2012-04-30-LoopUnswitch-LPad-Crash.ll | 4 +- .../2015-09-18-Addrspace.ll | 4 +- .../SimpleLoopUnswitch/delete-dead-blocks.ll | 10 +- .../formDedicatedAfterTrivial1.ll | 4 +- .../Transforms/SimpleLoopUnswitch/guards.ll | 54 ++- .../Transforms/SimpleLoopUnswitch/pr37888.ll | 2 +- .../SimpleLoopUnswitch/preserve-analyses.ll | 12 +- .../2004-12-10-SimplifyCFGCrash.ll | 20 +- .../SimplifyCFG/2006-06-12-InfLoop.ll | 32 +- .../Transforms/SimplifyCFG/branch-on-undef.ll | 6 +- .../SimplifyCFG/fold-branch-to-common-dest.ll | 4 +- llvm/test/Transforms/SimplifyCFG/pr34131.ll | 22 +- llvm/test/Transforms/Sink/dead-user.ll | 25 +- .../SpeculativeExecution/PR46267.ll | 4 +- .../StructurizeCFG/loop-continue-phi.ll | 27 +- 75 files changed, 834 insertions(+), 778 deletions(-) diff --git a/llvm/test/Transforms/ObjCARC/allocas.ll b/llvm/test/Transforms/ObjCARC/allocas.ll index be829882ae374..6fe2edf3e2dd4 100644 --- a/llvm/test/Transforms/ObjCARC/allocas.ll +++ b/llvm/test/Transforms/ObjCARC/allocas.ll @@ -109,16 +109,16 @@ entry: } -; CHECK: define void @test1d(ptr %x) +; CHECK: define void @test1d(ptr %x, i1 %arg) ; CHECK: @llvm.objc.retain(ptr %x) ; CHECK: @llvm.objc.retain(ptr %x) ; CHECK: @llvm.objc.release(ptr %y) ; CHECK: @llvm.objc.release(ptr %x) ; CHECK: ret void ; CHECK: } -define void @test1d(ptr %x) { +define void @test1d(ptr %x, i1 %arg) { entry: - br i1 undef, label %use_allocaA, label %use_allocaB + br i1 %arg, label %use_allocaA, label %use_allocaB use_allocaA: %allocaA = alloca ptr @@ -141,16 +141,16 @@ exit: ret void } -; CHECK: define void @test1e(ptr %x) +; CHECK: define void @test1e(ptr %x, i1 %arg) ; CHECK: @llvm.objc.retain(ptr %x) ; CHECK: @llvm.objc.retain(ptr %x) ; CHECK: @llvm.objc.release(ptr %y) ; CHECK: @llvm.objc.release(ptr %x) ; CHECK: ret void ; CHECK: } -define void @test1e(ptr %x) { +define void @test1e(ptr %x, i1 %arg) { entry: - br i1 undef, label %use_allocaA, label %use_allocaB + br i1 %arg, label %use_allocaA, label %use_allocaB use_allocaA: %allocaA = alloca ptr, i32 4 diff --git a/llvm/test/Transforms/ObjCARC/basic.ll b/llvm/test/Transforms/ObjCARC/basic.ll index 0ee59dc8ba6ab..d461bc0af680d 100644 --- a/llvm/test/Transforms/ObjCARC/basic.ll +++ b/llvm/test/Transforms/ObjCARC/basic.ll @@ -1761,13 +1761,13 @@ g: ; CHECK-LABEL: define void @test39( ; CHECK-NOT: @llvm.objc. ; CHECK: {{^}}} -define void @test39(ptr %p) { +define void @test39(ptr %p, i1 %arg) { entry: %0 = call ptr @llvm.objc.retain(ptr %p) br label %loop loop: ; preds = %loop, %entry - br i1 undef, label %loop, label %exit + br i1 %arg, label %loop, label %exit exit: ; preds = %loop call void @llvm.objc.release(ptr %0), !clang.imprecise_release !0 @@ -1779,14 +1779,14 @@ exit: ; preds = %loop ; CHECK-LABEL: define void @test39b( ; CHECK-NOT: @llvm.objc. ; CHECK: {{^}}} -define void @test39b(ptr %p) { +define void @test39b(ptr %p, i1 %arg) { entry: %0 = call ptr @llvm.objc.retain(ptr %p) br label %loop loop: ; preds = %loop, %entry store i8 0, ptr %0 - br i1 undef, label %loop, label %exit + br i1 %arg, label %loop, label %exit exit: ; preds = %loop call void @llvm.objc.release(ptr %0), !clang.imprecise_release !0 @@ -1798,14 +1798,14 @@ exit: ; preds = %loop ; CHECK-LABEL: define void @test39c( ; CHECK-NOT: @llvm.objc. ; CHECK: {{^}}} -define void @test39c(ptr %p) { +define void @test39c(ptr %p, i1 %arg) { entry: %0 = call ptr @llvm.objc.retain(ptr %p) br label %loop loop: ; preds = %loop, %entry call void @use_pointer(ptr %0) - br i1 undef, label %loop, label %exit + br i1 %arg, label %loop, label %exit exit: ; preds = %loop call void @llvm.objc.release(ptr %0), !clang.imprecise_release !0 @@ -1818,14 +1818,14 @@ exit: ; preds = %loop ; CHECK-LABEL: define void @test40( ; CHECK-NOT: @llvm.objc. ; CHECK: {{^}}} -define void @test40(ptr %p) { +define void @test40(ptr %p, i1 %arg) { entry: %0 = call ptr @llvm.objc.retain(ptr %p) br label %loop loop: ; preds = %loop, %entry call void @use_pointer(ptr %0) - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: ; preds = %loop call void @llvm.objc.release(ptr %0), !clang.imprecise_release !0 diff --git a/llvm/test/Transforms/ObjCARC/cfg-hazards.ll b/llvm/test/Transforms/ObjCARC/cfg-hazards.ll index 3e762de689d1f..d43f713d925e0 100644 --- a/llvm/test/Transforms/ObjCARC/cfg-hazards.ll +++ b/llvm/test/Transforms/ObjCARC/cfg-hazards.ll @@ -111,7 +111,7 @@ for.end: ; preds = %for.body } ; Delete nested retain+release pairs around loops. -define void @test3(ptr %a) nounwind { +define void @test3(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] @@ -119,7 +119,7 @@ define void @test3(ptr %a) nounwind { ; CHECK: loop: ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: store i8 0, ptr [[A]], align 1 -; CHECK-NEXT: br i1 undef, label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 ; CHECK-NEXT: ret void @@ -132,7 +132,7 @@ entry: loop: call void @callee() store i8 0, ptr %a - br i1 undef, label %loop, label %exit + br i1 %arg, label %loop, label %exit exit: call void @llvm.objc.release(ptr %a) nounwind @@ -140,7 +140,7 @@ exit: ret void } -define void @test4(ptr %a) nounwind { +define void @test4(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] @@ -151,7 +151,7 @@ define void @test4(ptr %a) nounwind { ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: store i8 0, ptr [[A]], align 1 -; CHECK-NEXT: br i1 undef, label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 ; CHECK-NEXT: ret void @@ -168,7 +168,7 @@ more: call void @callee() call void @callee() store i8 0, ptr %a - br i1 undef, label %loop, label %exit + br i1 %arg, label %loop, label %exit exit: call void @llvm.objc.release(ptr %a) nounwind @@ -176,18 +176,18 @@ exit: ret void } -define void @test5(ptr %a) nounwind { +define void @test5(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use_pointer(ptr [[A]]) ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 @@ -200,13 +200,13 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @use_pointer(ptr %a) @@ -215,18 +215,18 @@ exit: ret void } -define void @test6(ptr %a) nounwind { +define void @test6(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use_pointer(ptr [[A]]) ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 @@ -238,14 +238,14 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: call void @callee() br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @use_pointer(ptr %a) @@ -254,19 +254,19 @@ exit: ret void } -define void @test7(ptr %a) nounwind { +define void @test7(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: call void @use_pointer(ptr [[A]]) ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 ; CHECK-NEXT: ret void @@ -278,14 +278,14 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: call void @use_pointer(ptr %a) br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @llvm.objc.release(ptr %a) nounwind @@ -293,19 +293,19 @@ exit: ret void } -define void @test8(ptr %a) nounwind { +define void @test8(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: call void @use_pointer(ptr [[A]]) ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 ; CHECK-NEXT: ret void @@ -316,7 +316,7 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: call void @callee() @@ -324,7 +324,7 @@ true: br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @llvm.objc.release(ptr %a) nounwind @@ -332,17 +332,17 @@ exit: ret void } -define void @test9(ptr %a) nounwind { +define void @test9(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: call void @use_pointer(ptr [[A:%.*]]) ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -352,14 +352,14 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: call void @use_pointer(ptr %a) br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @llvm.objc.release(ptr %a) nounwind @@ -367,17 +367,17 @@ exit: ret void } -define void @test10(ptr %a) nounwind { +define void @test10(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test10( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: call void @callee() ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -387,14 +387,14 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: call void @callee() br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @llvm.objc.release(ptr %a) nounwind @@ -402,16 +402,16 @@ exit: ret void } -define void @test11(ptr %a) nounwind { +define void @test11(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test11( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: br label [[MORE]] ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -421,13 +421,13 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: br label %more more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @llvm.objc.release(ptr %a) nounwind @@ -437,18 +437,18 @@ exit: ; Don't delete anything if they're not balanced. -define void @test12(ptr %a) nounwind { +define void @test12(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUTER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A:%.*]]) #[[ATTR0]] ; CHECK-NEXT: [[INNER:%.*]] = tail call ptr @llvm.objc.retain(ptr [[A]]) #[[ATTR0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[TRUE:%.*]], label [[MORE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[TRUE:%.*]], label [[MORE:%.*]] ; CHECK: true: ; CHECK-NEXT: ret void ; CHECK: more: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]] ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 @@ -460,13 +460,13 @@ entry: br label %loop loop: - br i1 undef, label %true, label %more + br i1 %arg, label %true, label %more true: ret void more: - br i1 undef, label %exit, label %loop + br i1 %arg, label %exit, label %loop exit: call void @llvm.objc.release(ptr %a) nounwind @@ -479,7 +479,7 @@ exit: ; by an alloca. ; rdar://12969722 -define void @test13(ptr %a) nounwind { +define void @test13(ptr %a, i1 %arg) nounwind { ; CHECK-LABEL: @test13( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BLOCK:%.*]] = alloca ptr, align 8 @@ -491,7 +491,7 @@ define void @test13(ptr %a) nounwind { ; CHECK-NEXT: call void @block_callee(ptr [[BLOCK]]) ; CHECK-NEXT: [[RELOADED_A:%.*]] = load ptr, ptr [[BLOCK]], align 8 ; CHECK-NEXT: call void @llvm.objc.release(ptr [[RELOADED_A]]) #[[ATTR0]], !clang.imprecise_release !0 -; CHECK-NEXT: br i1 undef, label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.objc.release(ptr [[A]]) #[[ATTR0]], !clang.imprecise_release !0 ; CHECK-NEXT: ret void @@ -507,7 +507,7 @@ loop: call void @block_callee(ptr %block) %reloaded_a = load ptr, ptr %block, align 8 call void @llvm.objc.release(ptr %reloaded_a) nounwind, !clang.imprecise_release !0 - br i1 undef, label %loop, label %exit + br i1 %arg, label %loop, label %exit exit: call void @llvm.objc.release(ptr %a) nounwind, !clang.imprecise_release !0 diff --git a/llvm/test/Transforms/ObjCARC/contract-testcases.ll b/llvm/test/Transforms/ObjCARC/contract-testcases.ll index 36fe49382e413..4c842823b54e0 100644 --- a/llvm/test/Transforms/ObjCARC/contract-testcases.ll +++ b/llvm/test/Transforms/ObjCARC/contract-testcases.ll @@ -19,13 +19,13 @@ declare i32 @__gxx_personality_sj0(...) ; Don't get in trouble on bugpointed code. ; CHECK-LABEL: define void @test0( -define void @test0() { +define void @test0(i1 %arg) { bb: %tmp1 = tail call ptr @llvm.objc.retainAutoreleasedReturnValue(ptr undef) nounwind br label %bb3 bb3: ; preds = %bb2 - br i1 undef, label %bb6, label %bb4 + br i1 %arg, label %bb6, label %bb4 bb4: ; preds = %bb3 switch i64 undef, label %bb5 [ @@ -45,15 +45,15 @@ bb6: ; preds = %bb5, %bb4, %bb4, %b ; for the same block, use the exactly same value in each block. ; CHECK-LABEL: define void @test1( -; CHECK: br i1 undef, label %bb7, label %bb7 +; CHECK: br i1 %arg, label %bb7, label %bb7 ; CHECK: bb7: ; CHECK: %tmp8 = phi ptr [ %tmp3, %bb ], [ %tmp3, %bb ] ; CHECK: } -define void @test1() { +define void @test1(i1 %arg) { bb: %tmp = tail call ptr @objc_msgSend() %tmp3 = tail call ptr @llvm.objc.retainAutoreleasedReturnValue(ptr %tmp) nounwind - br i1 undef, label %bb7, label %bb7 + br i1 %arg, label %bb7, label %bb7 bb7: ; preds = %bb6, %bb6, %bb5 %tmp8 = phi ptr [ %tmp, %bb ], [ %tmp, %bb ] diff --git a/llvm/test/Transforms/ObjCARC/empty-block.ll b/llvm/test/Transforms/ObjCARC/empty-block.ll index 80930812fc7e9..be70beeeb731d 100644 --- a/llvm/test/Transforms/ObjCARC/empty-block.ll +++ b/llvm/test/Transforms/ObjCARC/empty-block.ll @@ -18,9 +18,9 @@ declare ptr @llvm.objc.autoreleaseReturnValue(ptr) ; CHECK: @llvm.objc.autoreleaseReturnValue ; CHECK-NOT: @llvm.objc. ; CHECK: } -define ptr @test0(ptr %buffer) nounwind { +define ptr @test0(ptr %buffer, i1 %arg) nounwind { %1 = tail call ptr @llvm.objc.retain(ptr %buffer) nounwind - br i1 undef, label %.lr.ph, label %._crit_edge + br i1 %arg, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %.lr.ph, %0 br i1 false, label %.lr.ph, label %._crit_edge @@ -37,10 +37,10 @@ define ptr @test0(ptr %buffer) nounwind { ; CHECK-LABEL: define ptr @test1( ; CHECK-NOT: @objc ; CHECK: } -define ptr @test1() nounwind { +define ptr @test1(i1 %arg) nounwind { %buffer = call ptr @foo() %1 = tail call ptr @llvm.objc.retain(ptr %buffer) nounwind - br i1 undef, label %.lr.ph, label %._crit_edge + br i1 %arg, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %.lr.ph, %0 br i1 false, label %.lr.ph, label %._crit_edge diff --git a/llvm/test/Transforms/ObjCARC/path-overflow.ll b/llvm/test/Transforms/ObjCARC/path-overflow.ll index eeef70554d114..431b377481262 100644 --- a/llvm/test/Transforms/ObjCARC/path-overflow.ll +++ b/llvm/test/Transforms/ObjCARC/path-overflow.ll @@ -29,9 +29,9 @@ declare i32 @__gxx_personality_sj0(...) declare i32 @__objc_personality_v0(...) -define hidden void @test1() personality ptr @__gxx_personality_sj0 { +define hidden void @test1(i1 %arg) personality ptr @__gxx_personality_sj0 { entry: - br i1 undef, label %msgSend.nullinit, label %msgSend.call + br i1 %arg, label %msgSend.nullinit, label %msgSend.call msgSend.call: ; preds = %entry br label %msgSend.cont @@ -41,7 +41,7 @@ msgSend.nullinit: ; preds = %entry msgSend.cont: ; preds = %msgSend.nullinit, %msgSend.call %0 = call ptr @llvm.objc.retain(ptr @_unnamed_cfstring) nounwind - br i1 undef, label %msgSend.nullinit33, label %msgSend.call32 + br i1 %arg, label %msgSend.nullinit33, label %msgSend.call32 msgSend.call32: ; preds = %if.end10 br label %msgSend.cont34 @@ -50,7 +50,7 @@ msgSend.nullinit33: ; preds = %if.end10 br label %msgSend.cont34 msgSend.cont34: ; preds = %msgSend.nullinit33, %msgSend.call32 - br i1 undef, label %msgSend.nullinit38, label %msgSend.call37 + br i1 %arg, label %msgSend.nullinit38, label %msgSend.call37 msgSend.call37: ; preds = %msgSend.cont34 br label %msgSend.cont39 @@ -59,7 +59,7 @@ msgSend.nullinit38: ; preds = %msgSend.cont34 br label %msgSend.cont39 msgSend.cont39: ; preds = %msgSend.nullinit38, %msgSend.call37 - br i1 undef, label %msgSend.nullinit49, label %msgSend.call48 + br i1 %arg, label %msgSend.nullinit49, label %msgSend.call48 msgSend.call48: ; preds = %msgSend.cont39 br label %msgSend.cont50 @@ -68,7 +68,7 @@ msgSend.nullinit49: ; preds = %msgSend.cont39 br label %msgSend.cont50 msgSend.cont50: ; preds = %msgSend.nullinit49, %msgSend.call48 - br i1 undef, label %msgSend.nullinit61, label %msgSend.call60 + br i1 %arg, label %msgSend.nullinit61, label %msgSend.call60 msgSend.call60: ; preds = %msgSend.cont50 br label %msgSend.cont62 @@ -77,7 +77,7 @@ msgSend.nullinit61: ; preds = %msgSend.cont50 br label %msgSend.cont62 msgSend.cont62: ; preds = %msgSend.nullinit61, %msgSend.call60 - br i1 undef, label %msgSend.nullinit67, label %msgSend.call66 + br i1 %arg, label %msgSend.nullinit67, label %msgSend.call66 msgSend.call66: ; preds = %msgSend.cont62 br label %msgSend.cont68 @@ -86,7 +86,7 @@ msgSend.nullinit67: ; preds = %msgSend.cont62 br label %msgSend.cont68 msgSend.cont68: ; preds = %msgSend.nullinit67, %msgSend.call66 - br i1 undef, label %msgSend.nullinit84, label %msgSend.call83 + br i1 %arg, label %msgSend.nullinit84, label %msgSend.call83 msgSend.call83: ; preds = %msgSend.cont68 br label %msgSend.cont85 @@ -95,7 +95,7 @@ msgSend.nullinit84: ; preds = %msgSend.cont68 br label %msgSend.cont85 msgSend.cont85: ; preds = %msgSend.nullinit84, %msgSend.call83 - br i1 undef, label %msgSend.nullinit90, label %msgSend.call89 + br i1 %arg, label %msgSend.nullinit90, label %msgSend.call89 msgSend.call89: ; preds = %msgSend.cont85 br label %msgSend.cont91 @@ -104,7 +104,7 @@ msgSend.nullinit90: ; preds = %msgSend.cont85 br label %msgSend.cont91 msgSend.cont91: ; preds = %msgSend.nullinit90, %msgSend.call89 - br i1 undef, label %msgSend.nullinit104, label %msgSend.call103 + br i1 %arg, label %msgSend.nullinit104, label %msgSend.call103 msgSend.call103: ; preds = %msgSend.cont91 br label %msgSend.cont105 @@ -113,16 +113,16 @@ msgSend.nullinit104: ; preds = %msgSend.cont91 br label %msgSend.cont105 msgSend.cont105: ; preds = %msgSend.nullinit104, %msgSend.call103 - br i1 undef, label %land.lhs.true, label %if.end123 + br i1 %arg, label %land.lhs.true, label %if.end123 land.lhs.true: ; preds = %msgSend.cont105 - br i1 undef, label %if.then117, label %if.end123 + br i1 %arg, label %if.then117, label %if.end123 if.then117: ; preds = %land.lhs.true br label %if.end123 if.end123: ; preds = %if.then117, %land.lhs.true, %msgSend.cont105 - br i1 undef, label %msgSend.nullinit132, label %msgSend.call131 + br i1 %arg, label %msgSend.nullinit132, label %msgSend.call131 msgSend.call131: ; preds = %if.end123 br label %msgSend.cont133 @@ -131,7 +131,7 @@ msgSend.nullinit132: ; preds = %if.end123 br label %msgSend.cont133 msgSend.cont133: ; preds = %msgSend.nullinit132, %msgSend.call131 - br i1 undef, label %msgSend.nullinit139, label %msgSend.call138 + br i1 %arg, label %msgSend.nullinit139, label %msgSend.call138 msgSend.call138: ; preds = %msgSend.cont133 br label %msgSend.cont140 @@ -140,13 +140,13 @@ msgSend.nullinit139: ; preds = %msgSend.cont133 br label %msgSend.cont140 msgSend.cont140: ; preds = %msgSend.nullinit139, %msgSend.call138 - br i1 undef, label %if.then151, label %if.end157 + br i1 %arg, label %if.then151, label %if.end157 if.then151: ; preds = %msgSend.cont140 br label %if.end157 if.end157: ; preds = %if.then151, %msgSend.cont140 - br i1 undef, label %msgSend.nullinit164, label %msgSend.call163 + br i1 %arg, label %msgSend.nullinit164, label %msgSend.call163 msgSend.call163: ; preds = %if.end157 br label %msgSend.cont165 @@ -155,7 +155,7 @@ msgSend.nullinit164: ; preds = %if.end157 br label %msgSend.cont165 msgSend.cont165: ; preds = %msgSend.nullinit164, %msgSend.call163 - br i1 undef, label %msgSend.nullinit176, label %msgSend.call175 + br i1 %arg, label %msgSend.nullinit176, label %msgSend.call175 msgSend.call175: ; preds = %msgSend.cont165 br label %msgSend.cont177 @@ -164,13 +164,13 @@ msgSend.nullinit176: ; preds = %msgSend.cont165 br label %msgSend.cont177 msgSend.cont177: ; preds = %msgSend.nullinit176, %msgSend.call175 - br i1 undef, label %land.lhs.true181, label %if.end202 + br i1 %arg, label %land.lhs.true181, label %if.end202 land.lhs.true181: ; preds = %msgSend.cont177 - br i1 undef, label %if.then187, label %if.end202 + br i1 %arg, label %if.then187, label %if.end202 if.then187: ; preds = %land.lhs.true181 - br i1 undef, label %msgSend.nullinit199, label %msgSend.call198 + br i1 %arg, label %msgSend.nullinit199, label %msgSend.call198 msgSend.call198: ; preds = %if.then187 br label %msgSend.cont200 @@ -182,7 +182,7 @@ msgSend.cont200: ; preds = %msgSend.nullinit199 br label %if.end202 if.end202: ; preds = %msgSend.cont200, %land.lhs.true181, %msgSend.cont177 - br i1 undef, label %msgSend.nullinit236, label %msgSend.call235 + br i1 %arg, label %msgSend.nullinit236, label %msgSend.call235 msgSend.call235: ; preds = %if.end202 br label %msgSend.cont237 @@ -191,7 +191,7 @@ msgSend.nullinit236: ; preds = %if.end202 br label %msgSend.cont237 msgSend.cont237: ; preds = %msgSend.nullinit236, %msgSend.call235 - br i1 undef, label %msgSend.nullinit254, label %msgSend.call253 + br i1 %arg, label %msgSend.nullinit254, label %msgSend.call253 msgSend.call253: ; preds = %msgSend.cont237 br label %msgSend.cont255 @@ -200,7 +200,7 @@ msgSend.nullinit254: ; preds = %msgSend.cont237 br label %msgSend.cont255 msgSend.cont255: ; preds = %msgSend.nullinit254, %msgSend.call253 - br i1 undef, label %msgSend.nullinit269, label %msgSend.call268 + br i1 %arg, label %msgSend.nullinit269, label %msgSend.call268 msgSend.call268: ; preds = %msgSend.cont255 br label %msgSend.cont270 @@ -209,7 +209,7 @@ msgSend.nullinit269: ; preds = %msgSend.cont255 br label %msgSend.cont270 msgSend.cont270: ; preds = %msgSend.nullinit269, %msgSend.call268 - br i1 undef, label %msgSend.nullinit281, label %msgSend.call280 + br i1 %arg, label %msgSend.nullinit281, label %msgSend.call280 msgSend.call280: ; preds = %msgSend.cont270 br label %msgSend.cont282 @@ -218,7 +218,7 @@ msgSend.nullinit281: ; preds = %msgSend.cont270 br label %msgSend.cont282 msgSend.cont282: ; preds = %msgSend.nullinit281, %msgSend.call280 - br i1 undef, label %msgSend.nullinit287, label %msgSend.call286 + br i1 %arg, label %msgSend.nullinit287, label %msgSend.call286 msgSend.call286: ; preds = %msgSend.cont282 br label %msgSend.cont288 @@ -227,7 +227,7 @@ msgSend.nullinit287: ; preds = %msgSend.cont282 br label %msgSend.cont288 msgSend.cont288: ; preds = %msgSend.nullinit287, %msgSend.call286 - br i1 undef, label %msgSend.nullinit303, label %msgSend.call302 + br i1 %arg, label %msgSend.nullinit303, label %msgSend.call302 msgSend.call302: ; preds = %msgSend.cont288 br label %msgSend.cont304 @@ -236,7 +236,7 @@ msgSend.nullinit303: ; preds = %msgSend.cont288 br label %msgSend.cont304 msgSend.cont304: ; preds = %msgSend.nullinit303, %msgSend.call302 - br i1 undef, label %msgSend.nullinit344, label %msgSend.call343 + br i1 %arg, label %msgSend.nullinit344, label %msgSend.call343 msgSend.call343: ; preds = %msgSend.cont304 br label %msgSend.cont345 @@ -245,7 +245,7 @@ msgSend.nullinit344: ; preds = %msgSend.cont304 br label %msgSend.cont345 msgSend.cont345: ; preds = %msgSend.nullinit344, %msgSend.call343 - br i1 undef, label %msgSend.nullinit350, label %msgSend.call349 + br i1 %arg, label %msgSend.nullinit350, label %msgSend.call349 msgSend.call349: ; preds = %msgSend.cont345 br label %msgSend.cont351 @@ -254,7 +254,7 @@ msgSend.nullinit350: ; preds = %msgSend.cont345 br label %msgSend.cont351 msgSend.cont351: ; preds = %msgSend.nullinit350, %msgSend.call349 - br i1 undef, label %msgSend.nullinit366, label %msgSend.call365 + br i1 %arg, label %msgSend.nullinit366, label %msgSend.call365 msgSend.call365: ; preds = %msgSend.cont351 br label %msgSend.cont367 @@ -263,7 +263,7 @@ msgSend.nullinit366: ; preds = %msgSend.cont351 br label %msgSend.cont367 msgSend.cont367: ; preds = %msgSend.nullinit366, %msgSend.call365 - br i1 undef, label %msgSend.nullinit376, label %msgSend.call375 + br i1 %arg, label %msgSend.nullinit376, label %msgSend.call375 msgSend.call375: ; preds = %msgSend.cont367 br label %msgSend.cont377 @@ -272,10 +272,10 @@ msgSend.nullinit376: ; preds = %msgSend.cont367 br label %msgSend.cont377 msgSend.cont377: ; preds = %msgSend.nullinit376, %msgSend.call375 - br i1 undef, label %if.then384, label %if.else401 + br i1 %arg, label %if.then384, label %if.else401 if.then384: ; preds = %msgSend.cont377 - br i1 undef, label %msgSend.nullinit392, label %msgSend.call391 + br i1 %arg, label %msgSend.nullinit392, label %msgSend.call391 msgSend.call391: ; preds = %if.then384 br label %msgSend.cont393 @@ -287,7 +287,7 @@ msgSend.cont393: ; preds = %msgSend.nullinit392 br label %if.end418 if.else401: ; preds = %msgSend.cont377 - br i1 undef, label %msgSend.nullinit409, label %msgSend.call408 + br i1 %arg, label %msgSend.nullinit409, label %msgSend.call408 msgSend.call408: ; preds = %if.else401 br label %msgSend.cont410 @@ -299,7 +299,7 @@ msgSend.cont410: ; preds = %msgSend.nullinit409 br label %if.end418 if.end418: ; preds = %msgSend.cont410, %msgSend.cont393 - br i1 undef, label %msgSend.nullinit470, label %msgSend.call469 + br i1 %arg, label %msgSend.nullinit470, label %msgSend.call469 msgSend.call469: ; preds = %if.end418 br label %msgSend.cont471 @@ -308,7 +308,7 @@ msgSend.nullinit470: ; preds = %if.end418 br label %msgSend.cont471 msgSend.cont471: ; preds = %msgSend.nullinit470, %msgSend.call469 - br i1 undef, label %msgSend.nullinit484, label %msgSend.call483 + br i1 %arg, label %msgSend.nullinit484, label %msgSend.call483 msgSend.call483: ; preds = %msgSend.cont471 br label %msgSend.cont485 @@ -317,7 +317,7 @@ msgSend.nullinit484: ; preds = %msgSend.cont471 br label %msgSend.cont485 msgSend.cont485: ; preds = %msgSend.nullinit484, %msgSend.call483 - br i1 undef, label %msgSend.nullinit500, label %msgSend.call499 + br i1 %arg, label %msgSend.nullinit500, label %msgSend.call499 msgSend.call499: ; preds = %msgSend.cont485 br label %msgSend.cont501 @@ -326,7 +326,7 @@ msgSend.nullinit500: ; preds = %msgSend.cont485 br label %msgSend.cont501 msgSend.cont501: ; preds = %msgSend.nullinit500, %msgSend.call499 - br i1 undef, label %msgSend.nullinit506, label %msgSend.call505 + br i1 %arg, label %msgSend.nullinit506, label %msgSend.call505 msgSend.call505: ; preds = %msgSend.cont501 br label %msgSend.cont507 @@ -340,78 +340,78 @@ msgSend.cont507: ; preds = %msgSend.nullinit506 } ; Function Attrs: optsize ssp uwtable -define void @test2() unnamed_addr align 2 personality ptr @__gxx_personality_sj0 { +define void @test2(i1 %arg) unnamed_addr align 2 personality ptr @__gxx_personality_sj0 { bb: - br i1 undef, label %bb3, label %bb2 + br i1 %arg, label %bb3, label %bb2 bb2: ; preds = %bb br label %bb3 bb3: ; preds = %bb2, %bb - br i1 undef, label %bb5, label %bb4 + br i1 %arg, label %bb5, label %bb4 bb4: ; preds = %bb3 br label %bb5 bb5: ; preds = %bb4, %bb3 - br i1 undef, label %bb7, label %bb6 + br i1 %arg, label %bb7, label %bb6 bb6: ; preds = %bb5 br label %bb7 bb7: ; preds = %bb6, %bb5 - br i1 undef, label %bb9, label %bb8 + br i1 %arg, label %bb9, label %bb8 bb8: ; preds = %bb7 unreachable bb9: ; preds = %bb7 - br i1 undef, label %bb11, label %bb10 + br i1 %arg, label %bb11, label %bb10 bb10: ; preds = %bb9 br label %bb11 bb11: ; preds = %bb10, %bb9 - br i1 undef, label %bb13, label %bb12 + br i1 %arg, label %bb13, label %bb12 bb12: ; preds = %bb11 br label %bb13 bb13: ; preds = %bb12, %bb11 - br i1 undef, label %bb15, label %bb14 + br i1 %arg, label %bb15, label %bb14 bb14: ; preds = %bb13 br label %bb15 bb15: ; preds = %bb14, %bb13 - br i1 undef, label %bb17, label %bb16 + br i1 %arg, label %bb17, label %bb16 bb16: ; preds = %bb15 br label %bb17 bb17: ; preds = %bb16, %bb15 - br i1 undef, label %bb19, label %bb18 + br i1 %arg, label %bb19, label %bb18 bb18: ; preds = %bb17 br label %bb19 bb19: ; preds = %bb18, %bb17 - br i1 undef, label %bb222, label %bb20 + br i1 %arg, label %bb222, label %bb20 bb20: ; preds = %bb19 - br i1 undef, label %bb222, label %bb21 + br i1 %arg, label %bb222, label %bb21 bb21: ; preds = %bb20 - br i1 undef, label %bb22, label %bb30 + br i1 %arg, label %bb22, label %bb30 bb22: ; preds = %bb21 - br i1 undef, label %bb23, label %bb32 + br i1 %arg, label %bb23, label %bb32 bb23: ; preds = %bb22 - br i1 undef, label %bb24, label %bb34 + br i1 %arg, label %bb24, label %bb34 bb24: ; preds = %bb23 - br i1 undef, label %bb26, label %bb25 + br i1 %arg, label %bb26, label %bb25 bb25: ; preds = %bb24 br label %bb27 @@ -420,10 +420,10 @@ bb26: ; preds = %bb24 br label %bb27 bb27: ; preds = %bb26, %bb25 - br i1 undef, label %bb28, label %bb42 + br i1 %arg, label %bb28, label %bb42 bb28: ; preds = %bb27 - br i1 undef, label %bb36, label %bb29 + br i1 %arg, label %bb36, label %bb29 bb29: ; preds = %bb28 br label %bb36 @@ -438,7 +438,7 @@ bb34: ; preds = %bb23 unreachable bb36: ; preds = %bb29, %bb28 - br i1 undef, label %bb38, label %bb37 + br i1 %arg, label %bb38, label %bb37 bb37: ; preds = %bb36 br label %bb39 @@ -447,7 +447,7 @@ bb38: ; preds = %bb36 br label %bb39 bb39: ; preds = %bb38, %bb37 - br i1 undef, label %bb41, label %bb40 + br i1 %arg, label %bb41, label %bb40 bb40: ; preds = %bb39 unreachable @@ -456,19 +456,19 @@ bb41: ; preds = %bb39 br label %bb42 bb42: ; preds = %bb41, %bb27 - br i1 undef, label %bb43, label %bb214 + br i1 %arg, label %bb43, label %bb214 bb43: ; preds = %bb42 - br i1 undef, label %bb47, label %bb45 + br i1 %arg, label %bb47, label %bb45 bb45: ; preds = %bb130, %bb128, %bb126, %bb124, %bb122, %bb120, %bb118, %bb116, %bb114, %bb112, %bb110, %bb108, %bb105, %bb102, %bb100, %bb96, %bb94, %bb90, %bb88, %bb84, %bb82, %bb78, %bb76, %bb72, %bb70, %bb66, %bb64, %bb60, %bb58, %bb54, %bb51, %bb43 unreachable bb47: ; preds = %bb43 - br i1 undef, label %bb48, label %bb106 + br i1 %arg, label %bb48, label %bb106 bb48: ; preds = %bb47 - br i1 undef, label %bb50, label %bb49 + br i1 %arg, label %bb50, label %bb49 bb49: ; preds = %bb48 br label %bb51 @@ -477,16 +477,16 @@ bb50: ; preds = %bb48 br label %bb51 bb51: ; preds = %bb50, %bb49 - br i1 undef, label %bb53, label %bb45 + br i1 %arg, label %bb53, label %bb45 bb53: ; preds = %bb51 - br i1 undef, label %bb54, label %bb134 + br i1 %arg, label %bb54, label %bb134 bb54: ; preds = %bb53 - br i1 undef, label %bb55, label %bb45 + br i1 %arg, label %bb55, label %bb45 bb55: ; preds = %bb54 - br i1 undef, label %bb57, label %bb56 + br i1 %arg, label %bb57, label %bb56 bb56: ; preds = %bb55 br label %bb58 @@ -495,13 +495,13 @@ bb57: ; preds = %bb55 br label %bb58 bb58: ; preds = %bb57, %bb56 - br i1 undef, label %bb60, label %bb45 + br i1 %arg, label %bb60, label %bb45 bb60: ; preds = %bb58 - br i1 undef, label %bb61, label %bb45 + br i1 %arg, label %bb61, label %bb45 bb61: ; preds = %bb60 - br i1 undef, label %bb63, label %bb62 + br i1 %arg, label %bb63, label %bb62 bb62: ; preds = %bb61 br label %bb64 @@ -510,13 +510,13 @@ bb63: ; preds = %bb61 br label %bb64 bb64: ; preds = %bb63, %bb62 - br i1 undef, label %bb66, label %bb45 + br i1 %arg, label %bb66, label %bb45 bb66: ; preds = %bb64 - br i1 undef, label %bb67, label %bb45 + br i1 %arg, label %bb67, label %bb45 bb67: ; preds = %bb66 - br i1 undef, label %bb69, label %bb68 + br i1 %arg, label %bb69, label %bb68 bb68: ; preds = %bb67 br label %bb70 @@ -525,13 +525,13 @@ bb69: ; preds = %bb67 br label %bb70 bb70: ; preds = %bb69, %bb68 - br i1 undef, label %bb72, label %bb45 + br i1 %arg, label %bb72, label %bb45 bb72: ; preds = %bb70 - br i1 undef, label %bb73, label %bb45 + br i1 %arg, label %bb73, label %bb45 bb73: ; preds = %bb72 - br i1 undef, label %bb75, label %bb74 + br i1 %arg, label %bb75, label %bb74 bb74: ; preds = %bb73 br label %bb76 @@ -540,13 +540,13 @@ bb75: ; preds = %bb73 br label %bb76 bb76: ; preds = %bb75, %bb74 - br i1 undef, label %bb78, label %bb45 + br i1 %arg, label %bb78, label %bb45 bb78: ; preds = %bb76 - br i1 undef, label %bb79, label %bb45 + br i1 %arg, label %bb79, label %bb45 bb79: ; preds = %bb78 - br i1 undef, label %bb81, label %bb80 + br i1 %arg, label %bb81, label %bb80 bb80: ; preds = %bb79 br label %bb82 @@ -555,13 +555,13 @@ bb81: ; preds = %bb79 br label %bb82 bb82: ; preds = %bb81, %bb80 - br i1 undef, label %bb84, label %bb45 + br i1 %arg, label %bb84, label %bb45 bb84: ; preds = %bb82 - br i1 undef, label %bb85, label %bb45 + br i1 %arg, label %bb85, label %bb45 bb85: ; preds = %bb84 - br i1 undef, label %bb87, label %bb86 + br i1 %arg, label %bb87, label %bb86 bb86: ; preds = %bb85 br label %bb88 @@ -570,13 +570,13 @@ bb87: ; preds = %bb85 br label %bb88 bb88: ; preds = %bb87, %bb86 - br i1 undef, label %bb90, label %bb45 + br i1 %arg, label %bb90, label %bb45 bb90: ; preds = %bb88 - br i1 undef, label %bb91, label %bb45 + br i1 %arg, label %bb91, label %bb45 bb91: ; preds = %bb90 - br i1 undef, label %bb93, label %bb92 + br i1 %arg, label %bb93, label %bb92 bb92: ; preds = %bb91 br label %bb94 @@ -585,13 +585,13 @@ bb93: ; preds = %bb91 br label %bb94 bb94: ; preds = %bb93, %bb92 - br i1 undef, label %bb96, label %bb45 + br i1 %arg, label %bb96, label %bb45 bb96: ; preds = %bb94 - br i1 undef, label %bb97, label %bb45 + br i1 %arg, label %bb97, label %bb45 bb97: ; preds = %bb96 - br i1 undef, label %bb99, label %bb98 + br i1 %arg, label %bb99, label %bb98 bb98: ; preds = %bb97 br label %bb100 @@ -600,82 +600,82 @@ bb99: ; preds = %bb97 br label %bb100 bb100: ; preds = %bb99, %bb98 - br i1 undef, label %bb102, label %bb45 + br i1 %arg, label %bb102, label %bb45 bb102: ; preds = %bb100 - br i1 undef, label %bb104, label %bb45 + br i1 %arg, label %bb104, label %bb45 bb104: ; preds = %bb102 - br i1 undef, label %bb108, label %bb105 + br i1 %arg, label %bb108, label %bb105 bb105: ; preds = %bb104 - br i1 undef, label %bb108, label %bb45 + br i1 %arg, label %bb108, label %bb45 bb106: ; preds = %bb47 unreachable bb108: ; preds = %bb105, %bb104 - br i1 undef, label %bb110, label %bb45 + br i1 %arg, label %bb110, label %bb45 bb110: ; preds = %bb108 - br i1 undef, label %bb112, label %bb45 + br i1 %arg, label %bb112, label %bb45 bb112: ; preds = %bb110 - br i1 undef, label %bb114, label %bb45 + br i1 %arg, label %bb114, label %bb45 bb114: ; preds = %bb112 - br i1 undef, label %bb116, label %bb45 + br i1 %arg, label %bb116, label %bb45 bb116: ; preds = %bb114 - br i1 undef, label %bb118, label %bb45 + br i1 %arg, label %bb118, label %bb45 bb118: ; preds = %bb116 - br i1 undef, label %bb120, label %bb45 + br i1 %arg, label %bb120, label %bb45 bb120: ; preds = %bb118 - br i1 undef, label %bb122, label %bb45 + br i1 %arg, label %bb122, label %bb45 bb122: ; preds = %bb120 - br i1 undef, label %bb124, label %bb45 + br i1 %arg, label %bb124, label %bb45 bb124: ; preds = %bb122 - br i1 undef, label %bb126, label %bb45 + br i1 %arg, label %bb126, label %bb45 bb126: ; preds = %bb124 - br i1 undef, label %bb128, label %bb45 + br i1 %arg, label %bb128, label %bb45 bb128: ; preds = %bb126 - br i1 undef, label %bb130, label %bb45 + br i1 %arg, label %bb130, label %bb45 bb130: ; preds = %bb128 - br i1 undef, label %bb132, label %bb45 + br i1 %arg, label %bb132, label %bb45 bb132: ; preds = %bb130 - br i1 undef, label %bb135, label %bb30 + br i1 %arg, label %bb135, label %bb30 bb134: ; preds = %bb53 unreachable bb135: ; preds = %bb132 - br i1 undef, label %bb139, label %bb136 + br i1 %arg, label %bb139, label %bb136 bb136: ; preds = %bb135 - br i1 undef, label %bb138, label %bb30 + br i1 %arg, label %bb138, label %bb30 bb138: ; preds = %bb136 br label %bb139 bb139: ; preds = %bb138, %bb135 - br i1 undef, label %bb140, label %bb141 + br i1 %arg, label %bb140, label %bb141 bb140: ; preds = %bb139 unreachable bb141: ; preds = %bb139 - br i1 undef, label %bb142, label %bb215 + br i1 %arg, label %bb142, label %bb215 bb142: ; preds = %bb141 - br i1 undef, label %bb144, label %bb143 + br i1 %arg, label %bb144, label %bb143 bb143: ; preds = %bb142 br label %bb145 @@ -684,16 +684,16 @@ bb144: ; preds = %bb142 br label %bb145 bb145: ; preds = %bb144, %bb143 - br i1 undef, label %bb146, label %bb151 + br i1 %arg, label %bb146, label %bb151 bb146: ; preds = %bb145 - br i1 undef, label %bb148, label %bb153 + br i1 %arg, label %bb148, label %bb153 bb148: ; preds = %bb146 - br i1 undef, label %bb155, label %bb149 + br i1 %arg, label %bb155, label %bb149 bb149: ; preds = %bb148 - br i1 undef, label %bb150, label %bb153 + br i1 %arg, label %bb150, label %bb153 bb150: ; preds = %bb149 br label %bb155 @@ -705,7 +705,7 @@ bb153: ; preds = %bb158, %bb149, %bb1 unreachable bb155: ; preds = %bb150, %bb148 - br i1 undef, label %bb157, label %bb156 + br i1 %arg, label %bb157, label %bb156 bb156: ; preds = %bb155 br label %bb158 @@ -714,10 +714,10 @@ bb157: ; preds = %bb155 br label %bb158 bb158: ; preds = %bb157, %bb156 - br i1 undef, label %bb160, label %bb153 + br i1 %arg, label %bb160, label %bb153 bb160: ; preds = %bb158 - br i1 undef, label %bb162, label %bb161 + br i1 %arg, label %bb162, label %bb161 bb161: ; preds = %bb160 br label %bb163 @@ -726,16 +726,16 @@ bb162: ; preds = %bb160 br label %bb163 bb163: ; preds = %bb162, %bb161 - br i1 undef, label %bb165, label %bb164 + br i1 %arg, label %bb165, label %bb164 bb164: ; preds = %bb163 br label %bb165 bb165: ; preds = %bb164, %bb163 - br i1 undef, label %bb170, label %bb166 + br i1 %arg, label %bb170, label %bb166 bb166: ; preds = %bb165 - br i1 undef, label %bb167, label %bb168 + br i1 %arg, label %bb167, label %bb168 bb167: ; preds = %bb166 unreachable @@ -744,25 +744,25 @@ bb168: ; preds = %bb166 unreachable bb170: ; preds = %bb165 - br i1 undef, label %bb215, label %bb171 + br i1 %arg, label %bb215, label %bb171 bb171: ; preds = %bb170 - br i1 undef, label %bb173, label %bb30 + br i1 %arg, label %bb173, label %bb30 bb173: ; preds = %bb171 - br i1 undef, label %bb174, label %bb215 + br i1 %arg, label %bb174, label %bb215 bb174: ; preds = %bb173 - br i1 undef, label %bb176, label %bb30 + br i1 %arg, label %bb176, label %bb30 bb176: ; preds = %bb174 - br i1 undef, label %bb178, label %bb30 + br i1 %arg, label %bb178, label %bb30 bb178: ; preds = %bb176 - br i1 undef, label %bb179, label %bb193 + br i1 %arg, label %bb179, label %bb193 bb179: ; preds = %bb178 - br i1 undef, label %bb181, label %bb180 + br i1 %arg, label %bb181, label %bb180 bb180: ; preds = %bb179 br label %bb182 @@ -771,23 +771,23 @@ bb181: ; preds = %bb179 br label %bb182 bb182: ; preds = %bb181, %bb180 - br i1 undef, label %bb184, label %bb30 + br i1 %arg, label %bb184, label %bb30 bb184: ; preds = %bb182 %tmp185 = call ptr @returner() - br i1 undef, label %bb186, label %bb195 + br i1 %arg, label %bb186, label %bb195 bb186: ; preds = %bb184 %tmp188 = call ptr @llvm.objc.retainAutoreleasedReturnValue(ptr %tmp185) %tmp189 = call ptr @llvm.objc.retain(ptr %tmp188) call void @llvm.objc.release(ptr %tmp189), !clang.imprecise_release !0 - br i1 undef, label %bb197, label %bb190 + br i1 %arg, label %bb197, label %bb190 bb190: ; preds = %bb186 - br i1 undef, label %bb192, label %bb195 + br i1 %arg, label %bb192, label %bb195 bb192: ; preds = %bb190 - br i1 undef, label %bb197, label %bb195 + br i1 %arg, label %bb197, label %bb195 bb193: ; preds = %bb178 br label %bb213 @@ -796,37 +796,37 @@ bb195: ; preds = %bb192, %bb190, %bb1 unreachable bb197: ; preds = %bb192, %bb186 - br i1 undef, label %bb198, label %bb215 + br i1 %arg, label %bb198, label %bb215 bb198: ; preds = %bb197 - br i1 undef, label %bb202, label %bb199 + br i1 %arg, label %bb202, label %bb199 bb199: ; preds = %bb198 - br i1 undef, label %bb201, label %bb30 + br i1 %arg, label %bb201, label %bb30 bb201: ; preds = %bb199 br label %bb202 bb202: ; preds = %bb201, %bb198 - br i1 undef, label %bb206, label %bb203 + br i1 %arg, label %bb206, label %bb203 bb203: ; preds = %bb202 - br i1 undef, label %bb205, label %bb30 + br i1 %arg, label %bb205, label %bb30 bb205: ; preds = %bb203 br label %bb206 bb206: ; preds = %bb205, %bb202 - br i1 undef, label %bb210, label %bb207 + br i1 %arg, label %bb210, label %bb207 bb207: ; preds = %bb206 - br i1 undef, label %bb209, label %bb30 + br i1 %arg, label %bb209, label %bb30 bb209: ; preds = %bb207 br label %bb210 bb210: ; preds = %bb209, %bb206 - br i1 undef, label %bb212, label %bb30 + br i1 %arg, label %bb212, label %bb30 bb212: ; preds = %bb210 unreachable @@ -838,19 +838,19 @@ bb214: ; preds = %bb42 br label %bb219 bb215: ; preds = %bb197, %bb173, %bb170, %bb141 - br i1 undef, label %bb217, label %bb216 + br i1 %arg, label %bb217, label %bb216 bb216: ; preds = %bb215 br label %bb217 bb217: ; preds = %bb216, %bb215 - br i1 undef, label %bb219, label %bb218 + br i1 %arg, label %bb219, label %bb218 bb218: ; preds = %bb217 br label %bb219 bb219: ; preds = %bb218, %bb217, %bb214 - br i1 undef, label %bb221, label %bb220 + br i1 %arg, label %bb221, label %bb220 bb220: ; preds = %bb219 unreachable @@ -863,7 +863,7 @@ bb222: ; preds = %bb20, %bb19 } ; Function Attrs: ssp -define void @test3() #1 personality ptr @__gxx_personality_sj0 { +define void @test3(i1 %arg) #1 personality ptr @__gxx_personality_sj0 { entry: %call2 = invoke ptr @objc_msgSend(ptr undef, ptr undef, ptr @_unnamed_cfstring) to label %invoke.cont unwind label %lpad @@ -873,7 +873,7 @@ invoke.cont: ; preds = %entry to label %invoke.cont4 unwind label %lpad3 invoke.cont4: ; preds = %invoke.cont - br i1 undef, label %land.end, label %land.rhs + br i1 %arg, label %land.end, label %land.rhs land.rhs: ; preds = %invoke.cont4 %call7 = invoke i32 @objc_msgSend(ptr undef, ptr undef) @@ -884,7 +884,7 @@ land.end: ; preds = %land.rhs, %invoke.c to label %invoke.cont.i unwind label %lpad.i invoke.cont.i: ; preds = %land.end - br i1 undef, label %invoke.cont8, label %if.then.i + br i1 %arg, label %invoke.cont8, label %if.then.i if.then.i: ; preds = %invoke.cont.i br label %invoke.cont8 @@ -907,7 +907,7 @@ invoke.cont21: ; preds = %invoke.cont17 to label %invoke.cont.i1980 unwind label %lpad.i1982 invoke.cont.i1980: ; preds = %invoke.cont21 - br i1 undef, label %invoke.cont24, label %if.then.i1981 + br i1 %arg, label %invoke.cont24, label %if.then.i1981 if.then.i1981: ; preds = %invoke.cont.i1980 br label %invoke.cont24 @@ -922,7 +922,7 @@ invoke.cont24: ; preds = %if.then.i1981, %inv to label %invoke.cont36 unwind label %lpad35 invoke.cont36: ; preds = %invoke.cont24 - br i1 undef, label %land.end43, label %land.rhs39 + br i1 %arg, label %land.end43, label %land.rhs39 land.rhs39: ; preds = %invoke.cont36 %call41 = invoke signext i8 @objc_msgSend(ptr undef, ptr undef, ptr @_unnamed_cfstring) @@ -933,7 +933,7 @@ land.end43: ; preds = %land.rhs39, %invoke to label %invoke.cont.i1986 unwind label %lpad.i1988 invoke.cont.i1986: ; preds = %land.end43 - br i1 undef, label %invoke.cont44, label %if.then.i1987 + br i1 %arg, label %invoke.cont44, label %if.then.i1987 if.then.i1987: ; preds = %invoke.cont.i1986 br label %invoke.cont44 @@ -948,7 +948,7 @@ invoke.cont44: ; preds = %if.then.i1987, %inv to label %invoke.cont52 unwind label %lpad51 invoke.cont52: ; preds = %invoke.cont44 - br i1 undef, label %land.end70, label %land.rhs58 + br i1 %arg, label %land.end70, label %land.rhs58 land.rhs58: ; preds = %invoke.cont52 %call63 = invoke ptr @objc_msgSend(ptr undef, ptr undef, i32 42) @@ -963,7 +963,7 @@ land.end70: ; preds = %invoke.cont62, %inv to label %invoke.cont.i1992 unwind label %lpad66.body invoke.cont.i1992: ; preds = %land.end70 - br i1 undef, label %invoke.cont71, label %if.then.i1993 + br i1 %arg, label %invoke.cont71, label %if.then.i1993 if.then.i1993: ; preds = %invoke.cont.i1992 br label %invoke.cont71 @@ -973,7 +973,7 @@ invoke.cont71: ; preds = %if.then.i1993, %inv to label %invoke.cont.i1998 unwind label %lpad.i2000 invoke.cont.i1998: ; preds = %invoke.cont71 - br i1 undef, label %invoke.cont91, label %if.then.i1999 + br i1 %arg, label %invoke.cont91, label %if.then.i1999 if.then.i1999: ; preds = %invoke.cont.i1998 br label %invoke.cont91 @@ -996,7 +996,7 @@ invoke.cont97: ; preds = %invoke.cont95 to label %invoke.cont.i2004 unwind label %lpad.i2006 invoke.cont.i2004: ; preds = %invoke.cont97 - br i1 undef, label %invoke.cont100, label %if.then.i2005 + br i1 %arg, label %invoke.cont100, label %if.then.i2005 if.then.i2005: ; preds = %invoke.cont.i2004 br label %invoke.cont100 @@ -1015,7 +1015,7 @@ invoke.cont110: ; preds = %invoke.cont100 to label %invoke.cont.i2010 unwind label %lpad.i2012 invoke.cont.i2010: ; preds = %invoke.cont110 - br i1 undef, label %invoke.cont117, label %if.then.i2011 + br i1 %arg, label %invoke.cont117, label %if.then.i2011 if.then.i2011: ; preds = %invoke.cont.i2010 br label %invoke.cont117 @@ -1094,7 +1094,7 @@ lpad109: ; preds = %invoke.cont100 unreachable invoke.cont.i2022: ; preds = %invoke.cont117 - br i1 undef, label %invoke.cont157, label %if.then.i2023 + br i1 %arg, label %invoke.cont157, label %if.then.i2023 if.then.i2023: ; preds = %invoke.cont.i2022 br label %invoke.cont157 @@ -1104,7 +1104,7 @@ invoke.cont157: ; preds = %if.then.i2023, %inv to label %invoke.cont.i2028 unwind label %lpad164.body invoke.cont.i2028: ; preds = %invoke.cont157 - br i1 undef, label %invoke.cont165, label %if.then.i2029 + br i1 %arg, label %invoke.cont165, label %if.then.i2029 if.then.i2029: ; preds = %invoke.cont.i2028 br label %invoke.cont165 @@ -1122,7 +1122,7 @@ invoke.cont185: ; preds = %invoke.cont184 to label %invoke.cont.i2034 unwind label %lpad.i2036 invoke.cont.i2034: ; preds = %invoke.cont185 - br i1 undef, label %invoke.cont190, label %if.then.i2035 + br i1 %arg, label %invoke.cont190, label %if.then.i2035 if.then.i2035: ; preds = %invoke.cont.i2034 br label %invoke.cont190 @@ -1149,7 +1149,7 @@ invoke.cont204: ; preds = %invoke.cont201 to label %invoke.cont.i2040 unwind label %lpad.i2042 invoke.cont.i2040: ; preds = %invoke.cont204 - br i1 undef, label %invoke.cont207, label %if.then.i2041 + br i1 %arg, label %invoke.cont207, label %if.then.i2041 if.then.i2041: ; preds = %invoke.cont.i2040 br label %invoke.cont207 @@ -1168,7 +1168,7 @@ invoke.cont208: ; preds = %invoke.cont207 to label %invoke.cont.i2046 unwind label %lpad212.body invoke.cont.i2046: ; preds = %invoke.cont208 - br i1 undef, label %invoke.cont213, label %if.then.i2047 + br i1 %arg, label %invoke.cont213, label %if.then.i2047 if.then.i2047: ; preds = %invoke.cont.i2046 br label %invoke.cont213 @@ -1186,7 +1186,7 @@ invoke.cont228: ; preds = %invoke.cont221 to label %invoke.cont.i2052 unwind label %lpad.i2054 invoke.cont.i2052: ; preds = %invoke.cont228 - br i1 undef, label %invoke.cont231, label %if.then.i2053 + br i1 %arg, label %invoke.cont231, label %if.then.i2053 if.then.i2053: ; preds = %invoke.cont.i2052 br label %invoke.cont231 @@ -1205,7 +1205,7 @@ invoke.cont232: ; preds = %invoke.cont231 to label %invoke.cont.i2058 unwind label %lpad236.body invoke.cont.i2058: ; preds = %invoke.cont232 - br i1 undef, label %invoke.cont237, label %if.then.i2059 + br i1 %arg, label %invoke.cont237, label %if.then.i2059 if.then.i2059: ; preds = %invoke.cont.i2058 br label %invoke.cont237 @@ -1251,7 +1251,7 @@ invoke.cont278: ; preds = %invoke.cont274 to label %invoke.cont.i2064 unwind label %lpad.i2066 invoke.cont.i2064: ; preds = %invoke.cont278 - br i1 undef, label %invoke.cont281, label %if.then.i2065 + br i1 %arg, label %invoke.cont281, label %if.then.i2065 if.then.i2065: ; preds = %invoke.cont.i2064 br label %invoke.cont281 @@ -1286,7 +1286,7 @@ invoke.cont315: ; preds = %invoke.cont312 to label %invoke.cont321 unwind label %lpad320 invoke.cont321: ; preds = %invoke.cont315 - br i1 undef, label %land.end344, label %land.rhs335 + br i1 %arg, label %land.end344, label %land.rhs335 land.rhs335: ; preds = %invoke.cont321 %call342 = invoke signext i8 @objc_msgSend(ptr undef, ptr undef, ptr @_unnamed_cfstring) @@ -1297,7 +1297,7 @@ land.end344: ; preds = %land.rhs335, %invok to label %invoke.cont.i2070 unwind label %lpad340.body invoke.cont.i2070: ; preds = %land.end344 - br i1 undef, label %invoke.cont345, label %if.then.i2071 + br i1 %arg, label %invoke.cont345, label %if.then.i2071 if.then.i2071: ; preds = %invoke.cont.i2070 br label %invoke.cont345 @@ -1319,7 +1319,7 @@ invoke.cont370: ; preds = %invoke.cont364 to label %invoke.cont.i2076 unwind label %lpad.i2078 invoke.cont.i2076: ; preds = %invoke.cont370 - br i1 undef, label %invoke.cont373, label %if.then.i2077 + br i1 %arg, label %invoke.cont373, label %if.then.i2077 if.then.i2077: ; preds = %invoke.cont.i2076 br label %invoke.cont373 @@ -1346,7 +1346,7 @@ invoke.cont383: ; preds = %invoke.cont382 to label %invoke.cont.i2082 unwind label %lpad.i2084 invoke.cont.i2082: ; preds = %invoke.cont383 - br i1 undef, label %invoke.cont392, label %if.then.i2083 + br i1 %arg, label %invoke.cont392, label %if.then.i2083 if.then.i2083: ; preds = %invoke.cont.i2082 br label %invoke.cont392 @@ -1377,7 +1377,7 @@ invoke.cont402: ; preds = %invoke.cont399 to label %invoke.cont.i2088 unwind label %lpad.i2090 invoke.cont.i2088: ; preds = %invoke.cont402 - br i1 undef, label %invoke.cont405, label %if.then.i2089 + br i1 %arg, label %invoke.cont405, label %if.then.i2089 if.then.i2089: ; preds = %invoke.cont.i2088 br label %invoke.cont405 @@ -1404,7 +1404,7 @@ invoke.cont412: ; preds = %invoke.cont409 to label %invoke.cont.i2094 unwind label %lpad.i2096 invoke.cont.i2094: ; preds = %invoke.cont412 - br i1 undef, label %invoke.cont418, label %if.then.i2095 + br i1 %arg, label %invoke.cont418, label %if.then.i2095 if.then.i2095: ; preds = %invoke.cont.i2094 br label %invoke.cont418 @@ -1435,7 +1435,7 @@ invoke.cont429: ; preds = %invoke.cont426 to label %invoke.cont.i2100 unwind label %lpad.i2102 invoke.cont.i2100: ; preds = %invoke.cont429 - br i1 undef, label %invoke.cont432, label %if.then.i2101 + br i1 %arg, label %invoke.cont432, label %if.then.i2101 if.then.i2101: ; preds = %invoke.cont.i2100 br label %invoke.cont432 @@ -1467,7 +1467,7 @@ invoke.cont443: ; preds = %invoke.cont.i2106 to label %invoke.cont.i2112 unwind label %lpad.i2114 invoke.cont.i2112: ; preds = %invoke.cont443 - br i1 undef, label %invoke.cont449, label %if.then.i2113 + br i1 %arg, label %invoke.cont449, label %if.then.i2113 if.then.i2113: ; preds = %invoke.cont.i2112 br label %invoke.cont449 @@ -1490,7 +1490,7 @@ invoke.cont455: ; preds = %invoke.cont452 to label %invoke.cont.i2118 unwind label %lpad.i2120 invoke.cont.i2118: ; preds = %invoke.cont455 - br i1 undef, label %invoke.cont458, label %if.then.i2119 + br i1 %arg, label %invoke.cont458, label %if.then.i2119 if.then.i2119: ; preds = %invoke.cont.i2118 br label %invoke.cont458 @@ -1509,7 +1509,7 @@ invoke.cont460: ; preds = %invoke.cont458 to label %invoke.cont.i2124 unwind label %lpad.i2126 invoke.cont.i2124: ; preds = %invoke.cont460 - br i1 undef, label %invoke.cont466, label %if.then.i2125 + br i1 %arg, label %invoke.cont466, label %if.then.i2125 if.then.i2125: ; preds = %invoke.cont.i2124 br label %invoke.cont466 @@ -1528,7 +1528,7 @@ invoke.cont469: ; preds = %invoke.cont466 to label %invoke.cont.i2130 unwind label %lpad.i2132 invoke.cont.i2130: ; preds = %invoke.cont469 - br i1 undef, label %invoke.cont475, label %if.then.i2131 + br i1 %arg, label %invoke.cont475, label %if.then.i2131 if.then.i2131: ; preds = %invoke.cont.i2130 br label %invoke.cont475 @@ -1563,7 +1563,7 @@ invoke.cont509: ; preds = %invoke.cont506 to label %invoke.cont512 unwind label %lpad489 invoke.cont512: ; preds = %invoke.cont509 - br i1 undef, label %msgSend.null-receiver, label %msgSend.call + br i1 %arg, label %msgSend.null-receiver, label %msgSend.call msgSend.call: ; preds = %invoke.cont512 invoke void @objc_msgSend_stret(ptr sret(%struct.CGPoint) undef, ptr undef, ptr undef) @@ -1577,7 +1577,7 @@ msgSend.cont: ; preds = %msgSend.null-receiv to label %invoke.cont.i2136 unwind label %lpad.i2138 invoke.cont.i2136: ; preds = %msgSend.cont - br i1 undef, label %invoke.cont521, label %if.then.i2137 + br i1 %arg, label %invoke.cont521, label %if.then.i2137 if.then.i2137: ; preds = %invoke.cont.i2136 br label %invoke.cont521 @@ -1604,7 +1604,7 @@ invoke.cont534: ; preds = %invoke.cont531 to label %invoke.cont.i2142 unwind label %lpad.i2144 invoke.cont.i2142: ; preds = %invoke.cont534 - br i1 undef, label %invoke.cont540, label %if.then.i2143 + br i1 %arg, label %invoke.cont540, label %if.then.i2143 if.then.i2143: ; preds = %invoke.cont.i2142 br label %invoke.cont540 @@ -1918,31 +1918,31 @@ eh.resume: ; preds = %lpad580, %ehcleanup @"OBJC_EHTYPE_$_NSException" = external global i8 -define void @test4() personality ptr @__objc_personality_v0 { +define void @test4(i1 %arg) personality ptr @__objc_personality_v0 { entry: - br i1 undef, label %if.end13, label %if.then10 + br i1 %arg, label %if.end13, label %if.then10 if.then10: ; preds = %entry br label %if.end13 if.end13: ; preds = %if.then10, %entry %0 = call ptr @objc_msgSend(ptr undef, ptr undef, ptr @_unnamed_cfstring, i64 2, ptr @_unnamed_cfstring_2, i8 signext 0), !clang.arc.no_objc_arc_exceptions !0 - br i1 undef, label %if.then17, label %if.end18 + br i1 %arg, label %if.then17, label %if.end18 if.then17: ; preds = %if.end13 br label %if.end18 if.end18: ; preds = %if.then17, %if.end13 - br i1 undef, label %if.then64, label %if.end73 + br i1 %arg, label %if.then64, label %if.end73 if.then64: ; preds = %if.end18 - br i1 undef, label %cond.end71, label %cond.true68 + br i1 %arg, label %cond.end71, label %cond.true68 cond.true68: ; preds = %if.then64 br label %cond.end71 cond.end71: ; preds = %cond.true68, %if.then64 - br i1 undef, label %cleanup.action, label %cleanup.done + br i1 %arg, label %cleanup.action, label %cleanup.done cleanup.action: ; preds = %cond.end71 br label %cleanup.done @@ -1951,7 +1951,7 @@ cleanup.done: ; preds = %cleanup.action, %co br label %if.end73 if.end73: ; preds = %cleanup.done, %if.end18 - br i1 undef, label %forcoll.empty, label %forcoll.loopinit + br i1 %arg, label %forcoll.empty, label %forcoll.loopinit forcoll.loopinit: ; preds = %if.end73 br label %forcoll.loopbody.outer @@ -1960,34 +1960,34 @@ forcoll.loopbody.outer: ; preds = %forcoll.refetch, %f br label %forcoll.loopbody forcoll.loopbody: ; preds = %forcoll.notmutated, %forcoll.loopbody.outer - br i1 undef, label %forcoll.notmutated, label %forcoll.mutated + br i1 %arg, label %forcoll.notmutated, label %forcoll.mutated forcoll.mutated: ; preds = %forcoll.loopbody br label %forcoll.notmutated forcoll.notmutated: ; preds = %forcoll.mutated, %forcoll.loopbody - br i1 undef, label %forcoll.loopbody, label %forcoll.refetch + br i1 %arg, label %forcoll.loopbody, label %forcoll.refetch forcoll.refetch: ; preds = %forcoll.notmutated - br i1 undef, label %forcoll.empty, label %forcoll.loopbody.outer + br i1 %arg, label %forcoll.empty, label %forcoll.loopbody.outer forcoll.empty: ; preds = %forcoll.refetch, %if.end73 - br i1 undef, label %if.end85, label %if.then82 + br i1 %arg, label %if.end85, label %if.then82 if.then82: ; preds = %forcoll.empty br label %if.end85 if.end85: ; preds = %if.then82, %forcoll.empty - br i1 undef, label %if.then87, label %if.end102 + br i1 %arg, label %if.then87, label %if.end102 if.then87: ; preds = %if.end85 - br i1 undef, label %if.end94, label %if.then91 + br i1 %arg, label %if.end94, label %if.then91 if.then91: ; preds = %if.then87 br label %if.end94 if.end94: ; preds = %if.then91, %if.then87 - br i1 undef, label %if.end101, label %if.then98 + br i1 %arg, label %if.end101, label %if.then98 if.then98: ; preds = %if.end94 br label %if.end101 @@ -1996,139 +1996,139 @@ if.end101: ; preds = %if.then98, %if.end9 br label %if.end102 if.end102: ; preds = %if.end101, %if.end85 - br i1 undef, label %do.body113, label %if.then107 + br i1 %arg, label %do.body113, label %if.then107 if.then107: ; preds = %if.end102 br label %do.body113 do.body113: ; preds = %if.then107, %if.end102 - br i1 undef, label %if.then116, label %if.end117 + br i1 %arg, label %if.then116, label %if.end117 if.then116: ; preds = %do.body113 br label %if.end117 if.end117: ; preds = %if.then116, %do.body113 - br i1 undef, label %if.then125, label %if.end126 + br i1 %arg, label %if.then125, label %if.end126 if.then125: ; preds = %if.end117 br label %if.end126 if.end126: ; preds = %if.then125, %if.end117 - br i1 undef, label %do.end166, label %cond.true132 + br i1 %arg, label %do.end166, label %cond.true132 cond.true132: ; preds = %if.end126 - br i1 undef, label %do.body148, label %cond.true151 + br i1 %arg, label %do.body148, label %cond.true151 do.body148: ; preds = %cond.true132 - br i1 undef, label %do.end166, label %cond.true151 + br i1 %arg, label %do.end166, label %cond.true151 cond.true151: ; preds = %do.body148, %cond.true132 - br i1 undef, label %if.then162, label %do.end166 + br i1 %arg, label %if.then162, label %do.end166 if.then162: ; preds = %cond.true151 br label %do.end166 do.end166: ; preds = %if.then162, %cond.true151, %do.body148, %if.end126 - br i1 undef, label %if.then304, label %if.then170 + br i1 %arg, label %if.then304, label %if.then170 if.then170: ; preds = %do.end166 - br i1 undef, label %do.end193, label %cond.true179 + br i1 %arg, label %do.end193, label %cond.true179 cond.true179: ; preds = %if.then170 - br i1 undef, label %if.then190, label %do.end193 + br i1 %arg, label %if.then190, label %do.end193 if.then190: ; preds = %cond.true179 br label %do.end193 do.end193: ; preds = %if.then190, %cond.true179, %if.then170 - br i1 undef, label %do.body200, label %do.body283 + br i1 %arg, label %do.body200, label %do.body283 do.body200: ; preds = %do.end193 - br i1 undef, label %do.end254, label %cond.true203 + br i1 %arg, label %do.end254, label %cond.true203 cond.true203: ; preds = %do.body200 - br i1 undef, label %do.body218, label %cond.true221 + br i1 %arg, label %do.body218, label %cond.true221 do.body218: ; preds = %cond.true203 - br i1 undef, label %do.end254, label %cond.true221 + br i1 %arg, label %do.end254, label %cond.true221 cond.true221: ; preds = %do.body218, %cond.true203 - br i1 undef, label %if.then232, label %do.body236 + br i1 %arg, label %if.then232, label %do.body236 if.then232: ; preds = %cond.true221 br label %do.body236 do.body236: ; preds = %if.then232, %cond.true221 - br i1 undef, label %do.end254, label %cond.true239 + br i1 %arg, label %do.end254, label %cond.true239 cond.true239: ; preds = %do.body236 - br i1 undef, label %if.then250, label %do.end254 + br i1 %arg, label %if.then250, label %do.end254 if.then250: ; preds = %cond.true239 br label %do.end254 do.end254: ; preds = %if.then250, %cond.true239, %do.body236, %do.body218, %do.body200 - br i1 undef, label %do.end277, label %cond.true263 + br i1 %arg, label %do.end277, label %cond.true263 cond.true263: ; preds = %do.end254 - br i1 undef, label %if.then274, label %do.end277 + br i1 %arg, label %if.then274, label %do.end277 if.then274: ; preds = %cond.true263 unreachable do.end277: ; preds = %cond.true263, %do.end254 - br i1 undef, label %if.then280, label %do.body283 + br i1 %arg, label %if.then280, label %do.body283 if.then280: ; preds = %do.end277 br label %do.body283 do.body283: ; preds = %if.then280, %do.end277, %do.end193 - br i1 undef, label %if.end301, label %cond.true286 + br i1 %arg, label %if.end301, label %cond.true286 cond.true286: ; preds = %do.body283 - br i1 undef, label %if.then297, label %if.end301 + br i1 %arg, label %if.then297, label %if.end301 if.then297: ; preds = %cond.true286 br label %if.end301 if.end301: ; preds = %if.then297, %cond.true286, %do.body283 - br i1 undef, label %if.then304, label %do.body351 + br i1 %arg, label %if.then304, label %do.body351 if.then304: ; preds = %if.end301, %do.end166 - br i1 undef, label %do.body309.lr.ph, label %do.body351 + br i1 %arg, label %do.body309.lr.ph, label %do.body351 do.body309.lr.ph: ; preds = %if.then304 br label %do.body309 do.body309: ; preds = %for.cond.backedge, %do.body309.lr.ph - br i1 undef, label %do.end328, label %cond.true312 + br i1 %arg, label %do.end328, label %cond.true312 cond.true312: ; preds = %do.body309 - br i1 undef, label %if.then323, label %do.end328 + br i1 %arg, label %if.then323, label %do.end328 if.then323: ; preds = %cond.true312 br label %do.end328 do.end328: ; preds = %if.then323, %cond.true312, %do.body309 - br i1 undef, label %for.cond.backedge, label %cond.true335 + br i1 %arg, label %for.cond.backedge, label %cond.true335 for.cond.backedge: ; preds = %if.then346, %cond.true335, %do.end328 - br i1 undef, label %do.body309, label %do.body351 + br i1 %arg, label %do.body309, label %do.body351 cond.true335: ; preds = %do.end328 - br i1 undef, label %if.then346, label %for.cond.backedge + br i1 %arg, label %if.then346, label %for.cond.backedge if.then346: ; preds = %cond.true335 br label %for.cond.backedge do.body351: ; preds = %for.cond.backedge, %if.then304, %if.end301 - br i1 undef, label %if.then354, label %if.end355 + br i1 %arg, label %if.then354, label %if.end355 if.then354: ; preds = %do.body351 br label %if.end355 if.end355: ; preds = %if.then354, %do.body351 - br i1 undef, label %if.else, label %if.then364 + br i1 %arg, label %if.else, label %if.then364 if.then364: ; preds = %if.end355 br label %do.body366 @@ -2137,7 +2137,7 @@ if.else: ; preds = %if.end355 br label %do.body366 do.body366: ; preds = %if.else, %if.then364 - br i1 undef, label %if.then369, label %if.end377.critedge + br i1 %arg, label %if.then369, label %if.end377.critedge if.then369: ; preds = %do.body366 br label %if.end377 @@ -2146,7 +2146,7 @@ if.end377.critedge: ; preds = %do.body366 br label %if.end377 if.end377: ; preds = %if.end377.critedge, %if.then369 - br i1 undef, label %if.then383, label %if.end392.critedge + br i1 %arg, label %if.then383, label %if.end392.critedge if.then383: ; preds = %if.end377 br label %if.end392 @@ -2155,7 +2155,7 @@ if.end392.critedge: ; preds = %if.end377 br label %if.end392 if.end392: ; preds = %if.end392.critedge, %if.then383 - br i1 undef, label %if.then398, label %if.end399 + br i1 %arg, label %if.then398, label %if.end399 if.then398: ; preds = %if.end392 br label %if.end399 @@ -2165,7 +2165,7 @@ if.end399: ; preds = %if.then398, %if.end to label %eh.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0 eh.cont: ; preds = %if.end399 - br i1 undef, label %if.then430, label %if.end439.critedge + br i1 %arg, label %if.then430, label %if.end439.critedge if.then430: ; preds = %eh.cont %1 = call ptr @llvm.objc.retain(ptr %0) diff --git a/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll b/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll index 4cde69bb0c7dd..80a9e77030e64 100644 --- a/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll +++ b/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" target triple = "i386-gnu-linux" -define void @exp_averages_intraday__deviation() { +define void @exp_averages_intraday__deviation(i1 %arg) { entry: %0 = load i32, ptr undef, align 4 %1 = shl i32 %0, 2 @@ -16,14 +16,14 @@ entry: br i1 false, label %"4", label %"12" "4": ; preds = %entry - br i1 undef, label %"5", label %"8" + br i1 %arg, label %"5", label %"8" "5": ; preds = %"4" unreachable "8": ; preds = %"4" %8 = getelementptr inbounds i8, ptr undef, i32 %6 - br i1 undef, label %"13", label %"12" + br i1 %arg, label %"13", label %"12" "12": ; preds = %"8", %entry ret void diff --git a/llvm/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll b/llvm/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll index 650aa82c1af2f..ba730758a1f26 100644 --- a/llvm/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll +++ b/llvm/test/Transforms/Reassociate/2012-06-08-InfiniteLoop.ll @@ -1,14 +1,14 @@ ; RUN: opt < %s -passes=reassociate -disable-output ; PR13041 -define void @foo() { +define void @foo(i1 %arg) { entry: br label %while.cond while.cond: ; preds = %while.body, %entry %b.0 = phi i32 [ undef, %entry ], [ %sub2, %while.body ] %c.0 = phi i32 [ undef, %entry ], [ %sub3, %while.body ] - br i1 undef, label %while.end, label %while.body + br i1 %arg, label %while.end, label %while.body while.body: ; preds = %while.cond %sub = sub nsw i32 0, %b.0 diff --git a/llvm/test/Transforms/Reassociate/add_across_block_crash.ll b/llvm/test/Transforms/Reassociate/add_across_block_crash.ll index 26e971266d344..a89f81154f3eb 100644 --- a/llvm/test/Transforms/Reassociate/add_across_block_crash.ll +++ b/llvm/test/Transforms/Reassociate/add_across_block_crash.ll @@ -3,10 +3,10 @@ ; This test is to make sure while processing a block, uses of instructions ; from a different basic block don't get added to be re-optimized -define void @main() { +define void @main(i1 %arg) { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label %bb1, label %bb2 +; CHECK-NEXT: br i1 %arg, label %bb1, label %bb2 ; CHECK: bb1: ; CHECK-NEXT: ret void ; CHECK: bb2: @@ -14,7 +14,7 @@ define void @main() { ; entry: %0 = fadd fast float undef, undef - br i1 undef, label %bb1, label %bb2 + br i1 %arg, label %bb1, label %bb2 bb1: %1 = fmul fast float undef, -2.000000e+00 diff --git a/llvm/test/Transforms/Reassociate/infloop-deadphi.ll b/llvm/test/Transforms/Reassociate/infloop-deadphi.ll index 5b19f8d384f4d..3202f450b7566 100644 --- a/llvm/test/Transforms/Reassociate/infloop-deadphi.ll +++ b/llvm/test/Transforms/Reassociate/infloop-deadphi.ll @@ -3,14 +3,14 @@ target triple = "x86_64-unknown-linux-gnu" -define void @f() { +define void @f(i1 %arg) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[DONE:%.*]] ; CHECK: dead: ; CHECK-NEXT: [[XOR0:%.*]] = xor i16 [[XOR1:%.*]], undef ; CHECK-NEXT: [[XOR1]] = xor i16 [[XOR0]], undef -; CHECK-NEXT: br i1 undef, label [[DEAD:%.*]], label [[DONE]] +; CHECK-NEXT: br i1 %arg, label [[DEAD:%.*]], label [[DONE]] ; CHECK: done: ; CHECK-NEXT: ret void ; @@ -20,7 +20,7 @@ entry: dead: %xor0 = xor i16 %xor1, undef %xor1 = xor i16 %xor0, undef - br i1 undef, label %dead, label %done + br i1 %arg, label %dead, label %done done: %e = phi i16 [ %xor1, %dead ], [ 0, %entry ] diff --git a/llvm/test/Transforms/Reassociate/reassociate-landingpad.ll b/llvm/test/Transforms/Reassociate/reassociate-landingpad.ll index eb6a5cabb7be8..c5bd62e2763b6 100644 --- a/llvm/test/Transforms/Reassociate/reassociate-landingpad.ll +++ b/llvm/test/Transforms/Reassociate/reassociate-landingpad.ll @@ -14,7 +14,7 @@ declare i32 @__gxx_personality_v0(...) declare void @b() #0 -define void @a() #0 personality ptr @__gxx_personality_v0 { +define void @a(i1 %arg) #0 personality ptr @__gxx_personality_v0 { ", bb1": invoke void @b() to label %invoke.cont unwind label %"bb22" @@ -39,7 +39,7 @@ define void @a() #0 personality ptr @__gxx_personality_v0 { unreachable invoke.cont: ; preds = %", bb1" - br i1 undef, label %", bb15", label %", bb8" + br i1 %arg, label %", bb15", label %", bb8" invoke.cont25: ; preds = %", bb8" unreachable diff --git a/llvm/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll b/llvm/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll index 2098b0390b64a..974c2dac78ebf 100644 --- a/llvm/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll +++ b/llvm/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=sccp -S | FileCheck %s -; Branch on undef is UB, so the T block is never executed, and we can return +; Branch on poison is UB, so the T block is never executed, and we can return ; undef (IPSCCP would replace the block with unreachable). define i32 @foo() { ; CHECK-LABEL: @foo( ; CHECK-NEXT: unreachable ; - br i1 undef, label %T, label %T + br i1 poison, label %T, label %T T: %X = add i32 0, 1 ret i32 %X diff --git a/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll b/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll index 3e2cfe1ce25fb..8c1687a3eb784 100644 --- a/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll +++ b/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll @@ -6,7 +6,7 @@ target triple = "powerpc-unknown-linux-gnu" @JUMP = external global i32 ; [#uses=1] @old_D_pat = external global [16 x i8] ; [#uses=0] -define void @asearch1(i32 %D) { +define void @asearch1(i32 %D, i1 %arg) { entry: %tmp80 = icmp ult i32 0, %D ; [#uses=1] br i1 %tmp80, label %bb647.preheader, label %cond_true81.preheader @@ -22,7 +22,7 @@ cond_true612: ; preds = %cond_true654 cond_next624: ; preds = %cond_true654 ret void cond_true654: ; preds = %bb647.preheader - br i1 undef, label %cond_true612, label %cond_next624 + br i1 %arg, label %cond_true612, label %cond_next624 UnifiedReturnBlock: ; preds = %bb647.preheader ret void } diff --git a/llvm/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll b/llvm/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll index 6f499f27a65a9..895b1393dadae 100644 --- a/llvm/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll +++ b/llvm/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll @@ -2,12 +2,30 @@ ; RUN: opt < %s -passes=sccp -S | FileCheck %s ; PR1938 -define i32 @main() { +define i32 @main(i1 %arg) { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[K:%.*]], [[BB_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[K]] = add i32 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[K]], 10 +; CHECK-NEXT: br i1 [[TMP97]], label [[BB_BACKEDGE]], label [[BB12:%.*]] +; CHECK: bb.backedge: +; CHECK-NEXT: br label [[BB]] +; CHECK: cond_false: +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[K]], 10 +; CHECK-NEXT: br i1 [[TMP9]], label [[BB_BACKEDGE]], label [[BB12]] +; CHECK: bb12: +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[K]], 10 +; CHECK-NEXT: br i1 [[TMP14]], label [[COND_NEXT18:%.*]], label [[COND_TRUE17:%.*]] +; CHECK: cond_true17: +; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable +; CHECK: cond_next18: +; CHECK-NEXT: ret i32 0 ; entry: br label %bb @@ -15,7 +33,7 @@ entry: bb: %indvar = phi i32 [ 0, %entry ], [ %k, %bb.backedge ] %k = add i32 %indvar, 1 - br i1 undef, label %cond_true, label %cond_false + br i1 %arg, label %cond_true, label %cond_false cond_true: %tmp97 = icmp slt i32 %k, 10 diff --git a/llvm/test/Transforms/SCCP/PR26044.ll b/llvm/test/Transforms/SCCP/PR26044.ll index 90ac3101d0c23..f786629f47d0e 100644 --- a/llvm/test/Transforms/SCCP/PR26044.ll +++ b/llvm/test/Transforms/SCCP/PR26044.ll @@ -3,13 +3,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @fn2(ptr %P) { +define void @fn2(ptr %P, i1 %arg) { ; CHECK-LABEL: define {{[^@]+}}@fn2 -; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-SAME: (ptr [[P:%.*]], i1 [[ARG:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: for.cond1: -; CHECK-NEXT: unreachable +; CHECK-NEXT: br i1 [[ARG]], label [[IF_END]], label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 undef) ; CHECK-NEXT: store i32 [[CALL]], ptr [[P]], align 4 @@ -19,7 +19,7 @@ entry: br label %if.end for.cond1: ; preds = %if.end, %for.end - br i1 undef, label %if.end, label %if.end + br i1 %arg, label %if.end, label %if.end if.end: ; preds = %lbl, %for.cond1 %e.2 = phi ptr [ undef, %entry ], [ null, %for.cond1 ], [ null, %for.cond1 ] @@ -43,15 +43,16 @@ entry: ret i32 %cond } -define void @fn_no_null_opt(ptr %P) #0 { +define void @fn_no_null_opt(ptr %P, i1 %arg) #0 { ; CHECK-LABEL: define {{[^@]+}}@fn_no_null_opt -; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr [[P:%.*]], i1 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: for.cond1: -; CHECK-NEXT: unreachable +; CHECK-NEXT: br i1 [[ARG]], label [[IF_END]], label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 undef) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) ; CHECK-NEXT: store i32 [[CALL]], ptr [[P]], align 4 ; CHECK-NEXT: br label [[FOR_COND1:%.*]] ; @@ -59,7 +60,7 @@ entry: br label %if.end for.cond1: ; preds = %if.end, %for.end - br i1 undef, label %if.end, label %if.end + br i1 %arg, label %if.end, label %if.end if.end: ; preds = %lbl, %for.cond1 %e.2 = phi ptr [ undef, %entry ], [ null, %for.cond1 ], [ null, %for.cond1 ] @@ -73,8 +74,8 @@ define internal i32 @fn0(i32 %p1) { ; CHECK-LABEL: define {{[^@]+}}@fn0 ; CHECK-SAME: (i32 [[P1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 undef, 0 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 undef, i32 undef +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[P1]], 0 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[P1]], i32 [[P1]] ; CHECK-NEXT: ret i32 [[COND]] ; entry: diff --git a/llvm/test/Transforms/SCCP/crash.ll b/llvm/test/Transforms/SCCP/crash.ll index 47d9329f6f03d..9001b42850c25 100644 --- a/llvm/test/Transforms/SCCP/crash.ll +++ b/llvm/test/Transforms/SCCP/crash.ll @@ -2,9 +2,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin10.0" -define void @test1(i8 %arg) { +define void @test1(i8 %arg, i1 %arg1) { entry: - br i1 undef, label %return, label %bb + br i1 %arg1, label %return, label %bb bb: br label %bb34 diff --git a/llvm/test/Transforms/SCCP/domtree-update.ll b/llvm/test/Transforms/SCCP/domtree-update.ll index 76f575c4e9c6d..270da0e2f2bfe 100644 --- a/llvm/test/Transforms/SCCP/domtree-update.ll +++ b/llvm/test/Transforms/SCCP/domtree-update.ll @@ -4,7 +4,7 @@ ; DTU should not crash. -define i32 @test() { +define i32 @test(i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -25,10 +25,10 @@ if.then2: ; preds = %for.body br label %for.inc if.else: ; preds = %for.body - br i1 undef, label %lor.rhs, label %if.then19.critedge + br i1 %arg, label %lor.rhs, label %if.then19.critedge lor.rhs: ; preds = %if.else - br i1 undef, label %if.then19, label %for.inc + br i1 %arg, label %if.then19, label %for.inc if.then19.critedge: ; preds = %if.else br label %if.then19 diff --git a/llvm/test/Transforms/SCCP/fp-bc-icmp-const-fold.ll b/llvm/test/Transforms/SCCP/fp-bc-icmp-const-fold.ll index 8061a0396ee4c..6a8b52d0ac481 100644 --- a/llvm/test/Transforms/SCCP/fp-bc-icmp-const-fold.ll +++ b/llvm/test/Transforms/SCCP/fp-bc-icmp-const-fold.ll @@ -2,18 +2,18 @@ target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" -define void @test(i32 signext %n) { +define void @test(i32 signext %n, i1 %arg) { ; CHECK-LABEL: @test entry: - br i1 undef, label %if.then, label %if.end + br i1 %arg, label %if.then, label %if.end if.then: ; preds = %entry ret void if.end: ; preds = %entry - br i1 undef, label %if.then2, label %if.end4 + br i1 %arg, label %if.then2, label %if.end4 if.then2: ; preds = %if.end unreachable @@ -36,10 +36,10 @@ if.else14: ; preds = %if.end4 do.body: ; preds = %do.body, %if.else14 %scale.0 = phi ppc_fp128 [ 0xM3FF00000000000000000000000000000, %if.else14 ], [ %scale.0, %do.body ] - br i1 undef, label %do.body, label %if.then33 + br i1 %arg, label %do.body, label %if.then33 if.then33: ; preds = %do.body - br i1 undef, label %_ZN5boost4math4signIgEEiRKT_.exit30, label %cond.false.i28 + br i1 %arg, label %_ZN5boost4math4signIgEEiRKT_.exit30, label %cond.false.i28 cond.false.i28: ; preds = %if.then33 %0 = bitcast ppc_fp128 %scale.0 to i128 diff --git a/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll b/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll index f8c8e33dfc233..be05d96f08574 100644 --- a/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll +++ b/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll @@ -20,16 +20,17 @@ ; CHECK-NEXT: [2] %for.body {4294967295,4294967295} [1] ; CHECK-NEXT: [2] %if.end4 {4294967295,4294967295} [1] ; CHECK-NEXT: [3] %entry {4294967295,4294967295} [2] -; CHECK-NEXT: [2] %for.cond34 {4294967295,4294967295} [1] -; CHECK-NEXT: [3] %for.cond16 {4294967295,4294967295} [2] -; CHECK-NEXT: Roots: %for.body %for.cond34 +; CHECK-NEXT: [2] %for.body37 {4294967295,4294967295} [1] +; CHECK-NEXT: [3] %for.cond34 {4294967295,4294967295} [2] +; CHECK-NEXT: [4] %for.cond16 {4294967295,4294967295} [3] +; CHECK-NEXT: Roots: %for.body %for.body37 ; CHECK-NEXT: PostDominatorTree for function: bar ; CHECK-NOT: declare hidden i1 @compare(ptr) align 2 declare hidden { i8, ptr } @getType(ptr) align 2 -define internal void @foo(ptr %TLI, ptr %DL, ptr %Ty, ptr %ValueVTs, ptr %Offsets, i64 %StartingOffset) { +define internal void @foo(ptr %TLI, ptr %DL, ptr %Ty, ptr %ValueVTs, ptr %Offsets, i64 %StartingOffset, i1 %arg) { entry: %VT = alloca i64, align 8 br i1 false, label %if.then, label %if.end4 @@ -51,7 +52,7 @@ for.cond16: ; preds = %for.cond34, %if.end br label %for.cond34 for.cond34: ; preds = %for.body37, %for.cond16 - br i1 undef, label %for.body37, label %for.cond16 + br i1 %arg, label %for.body37, label %for.cond16 for.body37: ; preds = %for.cond34 %tobool39 = icmp ne ptr %Offsets, null diff --git a/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll b/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll index ed5703da35e6f..1ba6e9734e642 100644 --- a/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll +++ b/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll @@ -5,7 +5,7 @@ @c = external dso_local global ptr, align 8 @d = external dso_local global i32, align 4 -define void @f(i32 %i) { +define void @f(i32 %i, i1 %arg) { entry: br label %for.cond @@ -474,7 +474,7 @@ if.then312: ; preds = %if.then309 br label %if.end628 if.else316: ; preds = %if.then309 - br i1 undef, label %if.then318, label %if.end628 + br i1 %arg, label %if.then318, label %if.end628 if.then318: ; preds = %if.else316 %idxprom320 = sext i32 %add310 to i64 @@ -726,7 +726,7 @@ if.then499: ; preds = %if.else496 br label %if.end628 if.else501: ; preds = %if.else496 - br i1 undef, label %if.then503, label %if.end628 + br i1 %arg, label %if.then503, label %if.end628 if.then503: ; preds = %if.else501 br label %if.end628 @@ -834,7 +834,7 @@ if.then596: ; preds = %if.then593 br label %if.end628 if.else600: ; preds = %if.then593 - br i1 undef, label %if.then602, label %if.end628 + br i1 %arg, label %if.then602, label %if.end628 if.then602: ; preds = %if.else600 %idxprom604 = sext i32 %add594 to i64 diff --git a/llvm/test/Transforms/SCCP/return-zapped.ll b/llvm/test/Transforms/SCCP/return-zapped.ll index 6d70500125093..cafc8aa69e868 100644 --- a/llvm/test/Transforms/SCCP/return-zapped.ll +++ b/llvm/test/Transforms/SCCP/return-zapped.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt < %s -S -passes=ipsccp | FileCheck %s -; testf() performs an unconditional branch on undef, as such the testf() return +; testf() performs an unconditional branch on poison, as such the testf() return ; value used in test1() will remain "unknown" and the following branch on it ; replaced by unreachable. This is fine, as the call to testf() will already ; trigger undefined behavior. @@ -31,7 +31,7 @@ define internal i1 @testf() { ; CHECK-NEXT: unreachable ; entry: - br i1 undef, label %if.then1, label %if.end3 + br i1 poison, label %if.then1, label %if.end3 if.then1: ; preds = %if.end br label %if.end3 diff --git a/llvm/test/Transforms/SCCP/solve-after-each-resolving-undefs-for-function.ll b/llvm/test/Transforms/SCCP/solve-after-each-resolving-undefs-for-function.ll index 05f3358045c3c..a4b1ba8a0c8dd 100644 --- a/llvm/test/Transforms/SCCP/solve-after-each-resolving-undefs-for-function.ll +++ b/llvm/test/Transforms/SCCP/solve-after-each-resolving-undefs-for-function.ll @@ -16,7 +16,7 @@ entry: br i1 %c, label %if.cond, label %if.end if.cond: - br i1 undef, label %if.then, label %if.end + br i1 poison, label %if.then, label %if.end if.then: ; preds = %entry, %if.then ret i32 11 diff --git a/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll b/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll index 2336c9186636e..73eef205adae7 100644 --- a/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll +++ b/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll @@ -59,13 +59,23 @@ bb38: ; preds = %bb16 } -define void @hoge() { -; CHECK-LABEL: define {{[^@]+}}@hoge() { +define void @hoge(i1 %arg, i16 %arg2) { +; CHECK-LABEL: define {{[^@]+}}@hoge +; CHECK-SAME: (i1 [[ARG:%.*]], i16 [[ARG2:%.*]]) { ; CHECK-NEXT: bb: +; CHECK-NEXT: switch i16 [[ARG2]], label [[BB1:%.*]] [ +; CHECK-NEXT: i16 135, label [[BB2:%.*]] +; CHECK-NEXT: i16 66, label [[BB2]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: ; CHECK-NEXT: unreachable ; bb: - switch i16 undef, label %bb1 [ + switch i16 %arg2, label %bb1 [ i16 135, label %bb2 i16 66, label %bb2 ] @@ -89,14 +99,9 @@ bb4: ; preds = %bb2, %bb2, %bb2 ; Test case from PR49573. %default.bb is unfeasible. Make sure it gets replaced ; by an unreachable block. -define void @pr49573_main() { -; CHECK-LABEL: define {{[^@]+}}@pr49573_main() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TGT:%.*]] = call i16 @pr49573_fn() -; CHECK-NEXT: unreachable -; +define void @pr49573_main(i1 %arg) { entry: - %tgt = call i16 @pr49573_fn() + %tgt = call i16 @pr49573_fn(i1 %arg) switch i16 %tgt, label %default.bb [ i16 0, label %case.0 i16 1, label %case.1 @@ -116,7 +121,7 @@ case.2: br label %next next: - %tgt.2 = call i16 @pr49573_fn_2() + %tgt.2 = call i16 @pr49573_fn_2(i1 %arg) switch i16 %tgt.2, label %default.bb [ i16 0, label %case.0 i16 2, label %case.2 @@ -124,14 +129,9 @@ next: } ; Make sure a new unreachable BB is created. -define void @pr49573_main_2() { -; CHECK-LABEL: define {{[^@]+}}@pr49573_main_2() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TGT:%.*]] = call i16 @pr49573_fn() -; CHECK-NEXT: unreachable -; +define void @pr49573_main_2(i1 %arg) { entry: - %tgt = call i16 @pr49573_fn() + %tgt = call i16 @pr49573_fn(i1 %arg) switch i16 %tgt, label %default.bb [ i16 0, label %case.0 i16 1, label %case.1 @@ -151,13 +151,18 @@ case.2: ret void } -define internal i16 @pr49573_fn() { -; CHECK-LABEL: define {{[^@]+}}@pr49573_fn() { +define internal i16 @pr49573_fn(i1 %arg) { +; CHECK-LABEL: define {{[^@]+}}@pr49573_fn +; CHECK-SAME: (i1 [[ARG:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: unreachable +; CHECK-NEXT: br i1 [[ARG]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: ret i16 0 +; CHECK: else: +; CHECK-NEXT: ret i16 2 ; entry: - br i1 undef, label %then, label %else + br i1 %arg, label %then, label %else then: ret i16 0 @@ -166,13 +171,18 @@ else: ret i16 2 } -define internal i16 @pr49573_fn_2() { -; CHECK-LABEL: define {{[^@]+}}@pr49573_fn_2() { +define internal i16 @pr49573_fn_2(i1 %arg) { +; CHECK-LABEL: define {{[^@]+}}@pr49573_fn_2 +; CHECK-SAME: (i1 [[ARG:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: unreachable +; CHECK-NEXT: br i1 [[ARG]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: ret i16 0 +; CHECK: else: +; CHECK-NEXT: ret i16 2 ; entry: - br i1 undef, label %then, label %else + br i1 %arg, label %then, label %else then: ret i16 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll index 920e1e64e3958..356102ce81780 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll @@ -8,10 +8,10 @@ target triple = "aarch64--linux-gnu" ; should not compute a smaller size for %k.13 since it is in a use-def cycle ; and cannot be demoted. ; -define fastcc void @PR26364() { +define fastcc void @PR26364(i1 %arg) { ; CHECK-LABEL: @PR26364( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[FOR_END11:%.*]], label [[FOR_COND4:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END11:%.*]], label [[FOR_COND4:%.*]] ; CHECK: for.cond4: ; CHECK-NEXT: [[K_13:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[K_3:%.*]], [[FOR_COND4]] ] ; CHECK-NEXT: [[E_02:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 0, [[FOR_COND4]] ] @@ -22,7 +22,7 @@ define fastcc void @PR26364() { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %for.end11, label %for.cond4 + br i1 %arg, label %for.end11, label %for.cond4 for.cond4: %k.13 = phi i32 [ undef, %entry ], [ %k.3, %for.cond4 ] @@ -39,10 +39,10 @@ for.end11: ; every root in the vectorizable tree when computing minimum sizes since one ; root may require fewer bits than another. ; -define void @PR26629(ptr %c) { +define void @PR26629(ptr %c, i1 %arg) { ; CHECK-LABEL: @PR26629( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[FOR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.ph: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[C:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -59,7 +59,7 @@ define void @PR26629(ptr %c) { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %for.ph, label %for.end + br i1 %arg, label %for.ph, label %for.end for.ph: %0 = load i32, ptr %c, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll index dc05967af1529..f5e904467baa7 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s -define i32 @foo(i32 %v1, double %v2) { +define i32 @foo(i32 %v1, double %v2, i1 %arg, i32 %arg2) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[V1:%.*]], i32 0 @@ -15,7 +15,7 @@ define i32 @foo(i32 %v1, double %v2) { ; CHECK: if.end: ; CHECK-NEXT: br label [[FOR_COND15:%.*]] ; CHECK: for.end39: -; CHECK-NEXT: switch i32 undef, label [[DO_BODY:%.*]] [ +; CHECK-NEXT: switch i32 %arg2, label [[DO_BODY:%.*]] [ ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 1, label [[SW_BB195:%.*]] ; CHECK-NEXT: ] @@ -39,7 +39,7 @@ define i32 @foo(i32 %v1, double %v2) { ; CHECK: if.end.1: ; CHECK-NEXT: br label [[FOR_COND15_1:%.*]] ; CHECK: for.cond15.1: -; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]] ; entry: %conv = sitofp i32 undef to double @@ -56,7 +56,7 @@ if.end: ; preds = %for.cond15.preheade br label %for.cond15 for.end39: ; preds = %for.cond15.1 - switch i32 undef, label %do.body [ + switch i32 %arg2, label %do.body [ i32 0, label %sw.bb i32 1, label %sw.bb195 ] @@ -99,7 +99,7 @@ if.end.1: ; preds = %for.cond15 br label %for.cond15.1 for.cond15.1: ; preds = %if.end.1 - br i1 undef, label %for.end39, label %for.cond15.preheader + br i1 %arg, label %for.end39, label %for.cond15.preheader } declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll index 9910090d43eae..82761b458efcf 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -3,13 +3,13 @@ target triple = "aarch64-unknown-linux-gnu" @d = internal unnamed_addr global i32 5, align 4 -define dso_local void @l() local_unnamed_addr { +define dso_local void @l(i1 %arg) local_unnamed_addr { ; CHECK-LABEL: @l( ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP9:%.*]], [[BB25:%.*]] ] -; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB3:%.*]], label [[BB11:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32 ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef @@ -34,7 +34,7 @@ define dso_local void @l() local_unnamed_addr { ; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 ; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP13]] ; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]] -; CHECK-NEXT: br i1 undef, label [[BB34:%.*]], label [[BB1]] +; CHECK-NEXT: br i1 %arg, label [[BB34:%.*]], label [[BB1]] ; CHECK: bb34: ; CHECK-NEXT: [[I35:%.*]] = phi i32 [ [[I33]], [[BB25]] ] ; CHECK-NEXT: br label [[BB36:%.*]] @@ -48,7 +48,7 @@ bb: bb1: ; preds = %bb25, %bb %i = phi i16 [ undef, %bb ], [ %i29, %bb25 ] %i2 = phi i16 [ undef, %bb ], [ %i30, %bb25 ] - br i1 undef, label %bb3, label %bb11 + br i1 %arg, label %bb3, label %bb11 bb3: ; preds = %bb1 %i4 = zext i1 undef to i32 @@ -85,7 +85,7 @@ bb25: ; preds = %bb11, %bb3 %i31 = and i32 undef, %i26 %i32 = and i32 %i31, %i27 %i33 = and i32 %i32, %i28 - br i1 undef, label %bb34, label %bb1 + br i1 %arg, label %bb34, label %bb1 bb34: ; preds = %bb25 %i35 = phi i32 [ %i33, %bb25 ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll index 607d7f7888784..c029781142af3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0" %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, ptr, ptr, ptr, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] } %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 } -define fastcc void @LzmaDec_DecodeReal2(ptr %p) { +define fastcc void @LzmaDec_DecodeReal2(ptr %p, i1 %arg) { ; CHECK-LABEL: @LzmaDec_DecodeReal2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], ptr [[P:%.*]], i64 0, i32 4 @@ -15,13 +15,13 @@ define fastcc void @LzmaDec_DecodeReal2(ptr %p) { ; CHECK: do.body66.i: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]] -; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] ; CHECK: if.else.i: ; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef ; CHECK-NEXT: br label [[DO_COND_I]] ; CHECK: do.cond.i: ; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ] -; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] ; CHECK: do.end1006.i: ; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]] ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[RANGE20_I]], align 4 @@ -37,7 +37,7 @@ do.body66.i: ; preds = %do.cond.i, %entry %code.2.i = phi i32 [ %code.4.i, %do.cond.i ], [ undef, %entry ] %.range.2.i = select i1 undef, i32 undef, i32 %range.2.i %.code.2.i = select i1 undef, i32 undef, i32 %code.2.i - br i1 undef, label %do.cond.i, label %if.else.i + br i1 %arg, label %do.cond.i, label %if.else.i if.else.i: ; preds = %do.body66.i %sub91.i = sub i32 %.range.2.i, undef @@ -47,7 +47,7 @@ if.else.i: ; preds = %do.body66.i do.cond.i: ; preds = %if.else.i, %do.body66.i %range.4.i = phi i32 [ %sub91.i, %if.else.i ], [ undef, %do.body66.i ] %code.4.i = phi i32 [ %sub92.i, %if.else.i ], [ %.code.2.i, %do.body66.i ] - br i1 undef, label %do.body66.i, label %do.end1006.i + br i1 %arg, label %do.body66.i, label %do.end1006.i do.end1006.i: ; preds = %do.cond.i %.range.4.i = select i1 undef, i32 undef, i32 %range.4.i diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll index f1f83c0663099..291edbbc925bd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll @@ -6,17 +6,17 @@ target triple = "x86_64-apple-macosx10.8.0" %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 } -define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(ptr nocapture %info) { +define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(ptr nocapture %info, i1 %arg) { ; CHECK-LABEL: @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: [[NUB5:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", ptr [[INFO:%.*]], i64 0, i32 1 -; CHECK-NEXT: br i1 undef, label [[LAND_LHS_TRUE_I_1:%.*]], label [[IF_THEN7_1:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LAND_LHS_TRUE_I_1:%.*]], label [[IF_THEN7_1:%.*]] ; CHECK: land.lhs.true.i.1: -; CHECK-NEXT: br i1 undef, label [[FOR_INC_1:%.*]], label [[IF_THEN7_1]] +; CHECK-NEXT: br i1 %arg, label [[FOR_INC_1:%.*]], label [[IF_THEN7_1]] ; CHECK: if.then7.1: ; CHECK-NEXT: store <2 x i32> , ptr [[INFO]], align 4 ; CHECK-NEXT: br label [[FOR_INC_1]] @@ -30,17 +30,17 @@ define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btCons ; CHECK-NEXT: unreachable ; entry: - br i1 undef, label %if.else, label %if.then + br i1 %arg, label %if.else, label %if.then if.then: ; preds = %entry ret void if.else: ; preds = %entry %nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", ptr %info, i64 0, i32 1 - br i1 undef, label %land.lhs.true.i.1, label %if.then7.1 + br i1 %arg, label %land.lhs.true.i.1, label %if.then7.1 land.lhs.true.i.1: ; preds = %if.else - br i1 undef, label %for.inc.1, label %if.then7.1 + br i1 %arg, label %for.inc.1, label %if.then7.1 if.then7.1: ; preds = %land.lhs.true.i.1, %if.else %inc.1 = add nsw i32 0, 1 @@ -63,7 +63,7 @@ for.inc.1: ; preds = %if.then7.1, %land.l %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 = type { [4 x float] } %class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 } -define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(ptr %this) { +define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(ptr %this, i1 %arg) { ; CHECK-LABEL: @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332:%.*]], ptr [[THIS:%.*]], i64 0, i32 2, i64 0, i32 0, i64 1 @@ -76,9 +76,9 @@ define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector ; CHECK-NEXT: store float [[TMP4]], ptr undef, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[ARRAYIDX26]], align 4 -; CHECK-NEXT: br i1 undef, label [[IF_ELSE1609:%.*]], label [[IF_THEN1595:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_ELSE1609:%.*]], label [[IF_THEN1595:%.*]] ; CHECK: if.then1595: -; CHECK-NEXT: br i1 undef, label [[RETURN:%.*]], label [[FOR_BODY_LR_PH_I_I1702:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN:%.*]], label [[FOR_BODY_LR_PH_I_I1702:%.*]] ; CHECK: for.body.lr.ph.i.i1702: ; CHECK-NEXT: unreachable ; CHECK: if.else1609: @@ -99,10 +99,10 @@ entry: %sub639 = fsub float %add626, undef %sub652 = fsub float %add626, %sub639 store float %sub652, ptr %arrayidx36, align 4 - br i1 undef, label %if.else1609, label %if.then1595 + br i1 %arg, label %if.else1609, label %if.then1595 if.then1595: ; preds = %entry - br i1 undef, label %return, label %for.body.lr.ph.i.i1702 + br i1 %arg, label %return, label %for.body.lr.ph.i.i1702 for.body.lr.ph.i.i1702: ; preds = %if.then1595 unreachable @@ -114,34 +114,34 @@ return: ; preds = %if.then1595 ret void } -define void @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE() { +define void @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE(i1 %arg) { ; CHECK-LABEL: @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END111:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END111:%.*]] ; CHECK: if.end111: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END136:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END136:%.*]] ; CHECK: if.end136: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END162:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END162:%.*]] ; CHECK: if.end162: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END189:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END189:%.*]] ; CHECK: if.end189: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END216:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END216:%.*]] ; CHECK: if.end216: -; CHECK-NEXT: br i1 undef, label [[IF_THEN218:%.*]], label [[IF_END225:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN218:%.*]], label [[IF_END225:%.*]] ; CHECK: if.then218: ; CHECK-NEXT: br label [[IF_END225]] ; CHECK: if.end225: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END248:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END248:%.*]] ; CHECK: if.end248: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END304:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END304:%.*]] ; CHECK: if.end304: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END361:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END361:%.*]] ; CHECK: if.end361: -; CHECK-NEXT: br i1 undef, label [[IF_THEN370:%.*]], label [[IF_END395:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN370:%.*]], label [[IF_END395:%.*]] ; CHECK: if.then370: -; CHECK-NEXT: br i1 undef, label [[IF_THEN374:%.*]], label [[IF_END395]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN374:%.*]], label [[IF_END395]] ; CHECK: if.then374: ; CHECK-NEXT: br label [[IF_END395]] ; CHECK: if.end395: @@ -152,47 +152,47 @@ define void @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36b entry: %add8.i2343 = fadd float undef, undef %add8.i2381 = fadd float undef, undef - br i1 undef, label %return, label %if.end + br i1 %arg, label %return, label %if.end if.end: ; preds = %entry - br i1 undef, label %return, label %if.end111 + br i1 %arg, label %return, label %if.end111 if.end111: ; preds = %if.end - br i1 undef, label %return, label %if.end136 + br i1 %arg, label %return, label %if.end136 if.end136: ; preds = %if.end111 - br i1 undef, label %return, label %if.end162 + br i1 %arg, label %return, label %if.end162 if.end162: ; preds = %if.end136 - br i1 undef, label %return, label %if.end189 + br i1 %arg, label %return, label %if.end189 if.end189: ; preds = %if.end162 - br i1 undef, label %return, label %if.end216 + br i1 %arg, label %return, label %if.end216 if.end216: ; preds = %if.end189 - br i1 undef, label %if.then218, label %if.end225 + br i1 %arg, label %if.then218, label %if.end225 if.then218: ; preds = %if.end216 br label %if.end225 if.end225: ; preds = %if.then218, %if.end216 - br i1 undef, label %return, label %if.end248 + br i1 %arg, label %return, label %if.end248 if.end248: ; preds = %if.end225 - br i1 undef, label %return, label %if.end304 + br i1 %arg, label %return, label %if.end304 if.end304: ; preds = %if.end248 %mul341 = fmul float undef, %add8.i2343 %mul344 = fmul float undef, %add8.i2381 %sub345 = fsub float %mul341, %mul344 - br i1 undef, label %return, label %if.end361 + br i1 %arg, label %return, label %if.end361 if.end361: ; preds = %if.end304 %mul364 = fmul float %add8.i2381, %add8.i2381 - br i1 undef, label %if.then370, label %if.end395 + br i1 %arg, label %if.then370, label %if.end395 if.then370: ; preds = %if.end361 - br i1 undef, label %if.then374, label %if.end395 + br i1 %arg, label %if.then374, label %if.end395 if.then374: ; preds = %if.then370 %cmp392 = fcmp olt float %sub345, 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll index 925b348cdeec1..55e691b39d78c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -7,36 +7,36 @@ target triple = "x86_64-apple-macosx10.8.0" %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113 = type { [4 x float] } ; Function Attrs: ssp uwtable -define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %vertices) #0 align 2 { +define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %vertices, i1 %arg) #0 align 2 { ; CHECK-LABEL: @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[IF_THEN17_1:%.*]], label [[IF_END22_1:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN17_1:%.*]], label [[IF_END22_1:%.*]] ; CHECK: for.end36: ; CHECK-NEXT: br label [[FOR_BODY144:%.*]] ; CHECK: for.body144: -; CHECK-NEXT: br i1 undef, label [[FOR_END227:%.*]], label [[FOR_BODY144]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END227:%.*]], label [[FOR_BODY144]] ; CHECK: for.end227: -; CHECK-NEXT: br i1 undef, label [[FOR_END271:%.*]], label [[FOR_BODY233:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END271:%.*]], label [[FOR_BODY233:%.*]] ; CHECK: for.body233: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY233]], label [[FOR_END271]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY233]], label [[FOR_END271]] ; CHECK: for.end271: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ splat (float 0x47EFFFFFE0000000), [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ] ; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> undef, [[TMP0]] -; CHECK-NEXT: br i1 undef, label [[IF_THEN291:%.*]], label [[RETURN]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN291:%.*]], label [[RETURN]] ; CHECK: if.then291: ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 5.000000e-01) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[TMP2]] -; CHECK-NEXT: br i1 undef, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]] ; CHECK: if.else319: -; CHECK-NEXT: br i1 undef, label [[IF_THEN325:%.*]], label [[IF_END327:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN325:%.*]], label [[IF_END327:%.*]] ; CHECK: if.then325: ; CHECK-NEXT: br label [[IF_END327]] ; CHECK: if.end327: -; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN329:%.*]], label [[IF_END332]] ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: @@ -49,55 +49,55 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve ; CHECK: if.then17.1: ; CHECK-NEXT: br label [[IF_END22_1]] ; CHECK: if.end22.1: -; CHECK-NEXT: br i1 undef, label [[IF_THEN17_2:%.*]], label [[IF_END22_2:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN17_2:%.*]], label [[IF_END22_2:%.*]] ; CHECK: if.then17.2: ; CHECK-NEXT: br label [[IF_END22_2]] ; CHECK: if.end22.2: -; CHECK-NEXT: br i1 undef, label [[FOR_END36:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END36:%.*]], label [[FOR_BODY]] ; entry: - br i1 undef, label %return, label %if.end + br i1 %arg, label %return, label %if.end if.end: ; preds = %entry br label %for.body for.body: ; preds = %if.end22.2, %if.end - br i1 undef, label %if.then17.1, label %if.end22.1 + br i1 %arg, label %if.then17.1, label %if.end22.1 for.end36: ; preds = %if.end22.2 br label %for.body144 for.body144: ; preds = %for.body144, %for.end36 - br i1 undef, label %for.end227, label %for.body144 + br i1 %arg, label %for.end227, label %for.body144 for.end227: ; preds = %for.body144 - br i1 undef, label %for.end271, label %for.body233 + br i1 %arg, label %for.end271, label %for.body233 for.body233: ; preds = %for.body233, %for.end227 - br i1 undef, label %for.body233, label %for.end271 + br i1 %arg, label %for.body233, label %for.end271 for.end271: ; preds = %for.body233, %for.end227 %0 = phi float [ 0x47EFFFFFE0000000, %for.end227 ], [ undef, %for.body233 ] %1 = phi float [ 0x47EFFFFFE0000000, %for.end227 ], [ undef, %for.body233 ] %sub275 = fsub float undef, %1 %sub279 = fsub float undef, %0 - br i1 undef, label %if.then291, label %return + br i1 %arg, label %if.then291, label %return if.then291: ; preds = %for.end271 %mul292 = fmul float %sub275, 5.000000e-01 %add294 = fadd float %1, %mul292 %mul295 = fmul float %sub279, 5.000000e-01 %add297 = fadd float %0, %mul295 - br i1 undef, label %if.end332, label %if.else319 + br i1 %arg, label %if.end332, label %if.else319 if.else319: ; preds = %if.then291 - br i1 undef, label %if.then325, label %if.end327 + br i1 %arg, label %if.then325, label %if.end327 if.then325: ; preds = %if.else319 br label %if.end327 if.end327: ; preds = %if.then325, %if.else319 - br i1 undef, label %if.then329, label %if.end332 + br i1 %arg, label %if.then329, label %if.end332 if.then329: ; preds = %if.end327 br label %if.end332 @@ -119,13 +119,13 @@ if.then17.1: ; preds = %for.body br label %if.end22.1 if.end22.1: ; preds = %if.then17.1, %for.body - br i1 undef, label %if.then17.2, label %if.end22.2 + br i1 %arg, label %if.then17.2, label %if.end22.2 if.then17.2: ; preds = %if.end22.1 br label %if.end22.2 if.end22.2: ; preds = %if.then17.2, %if.end22.1 - br i1 undef, label %for.end36, label %for.body + br i1 %arg, label %for.end36, label %for.body } attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll index 4de16a5d57793..faf4496ce2722 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll @@ -6,23 +6,23 @@ target triple = "x86_64-apple-macosx10.8.0" %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { ptr, ptr, ptr, ptr } ; Function Attrs: nounwind ssp uwtable -define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(ptr %__first, ptr nocapture %__last) { +define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(ptr %__first, ptr nocapture %__last, i1 %arg) { ; CHECK-LABEL: @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__FIRST:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[__LAST:%.*]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[TMP0]], i32 0 -; CHECK-NEXT: br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT:%.*]], label [[WHILE_COND_I_PREHEADER:%.*]] +; CHECK-NEXT: br i1 %arg, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT:%.*]], label [[WHILE_COND_I_PREHEADER:%.*]] ; CHECK: while.cond.i.preheader: ; CHECK-NEXT: br label [[WHILE_COND_I:%.*]] ; CHECK: while.cond.i: -; CHECK-NEXT: br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_BODY_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_BODY_I:%.*]] ; CHECK: while.body.i: -; CHECK-NEXT: br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_COND_I]] +; CHECK-NEXT: br i1 %arg, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_COND_I]] ; CHECK: _ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x ptr> [ [[TMP2]], [[ENTRY:%.*]] ], [ [[TMP1]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ] ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[__FIRST]], align 8 -; CHECK-NEXT: br i1 undef, label [[IF_THEN_I55:%.*]], label [[WHILE_COND:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN_I55:%.*]], label [[WHILE_COND:%.*]] ; CHECK: if.then.i55: ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.cond: @@ -34,23 +34,23 @@ entry: %1 = load ptr, ptr %__last, align 8 %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", ptr %__last, i64 0, i32 1 %2 = load ptr, ptr %_M_first3.i.i83, align 8 - br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader + br i1 %arg, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader while.cond.i.preheader: ; preds = %entry br label %while.cond.i while.cond.i: ; preds = %while.body.i, %while.cond.i.preheader - br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.body.i + br i1 %arg, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.body.i while.body.i: ; preds = %while.cond.i - br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i + br i1 %arg, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i _ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; preds = %while.body.i, %while.cond.i, %entry %3 = phi ptr [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ] %4 = phi ptr [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ] store ptr %4, ptr %__first, align 8 store ptr %3, ptr %_M_first3.i.i, align 8 - br i1 undef, label %if.then.i55, label %while.cond + br i1 %arg, label %if.then.i55, label %while.cond if.then.i55: ; preds = %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit br label %while.cond diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll index 371b06869841b..fc1bd856da9c3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_flop7.ll @@ -5,20 +5,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" ; Function Attrs: nounwind ssp uwtable -define void @main() #0 { +define void @main(i1 %arg) #0 { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.body: ; CHECK-NEXT: unreachable ; CHECK: while.end: -; CHECK-NEXT: br i1 undef, label [[FOR_END80:%.*]], label [[FOR_BODY75_LR_PH:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END80:%.*]], label [[FOR_BODY75_LR_PH:%.*]] ; CHECK: for.body75.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY75:%.*]] ; CHECK: for.body75: ; CHECK-NEXT: br label [[FOR_BODY75]] ; CHECK: for.end80: -; CHECK-NEXT: br i1 undef, label [[FOR_END300:%.*]], label [[FOR_BODY267_LR_PH:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END300:%.*]], label [[FOR_BODY267_LR_PH:%.*]] ; CHECK: for.body267.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY267:%.*]] ; CHECK: for.body267: @@ -32,18 +32,18 @@ define void @main() #0 { ; CHECK-NEXT: [[ADD295:%.*]] = fadd double undef, [[MUL294]] ; CHECK-NEXT: [[DIV296:%.*]] = fdiv double [[MUL283]], [[ADD295]] ; CHECK-NEXT: [[ADD297]] = fadd double [[S_71010]], [[DIV296]] -; CHECK-NEXT: br i1 undef, label [[FOR_BODY267]], label [[FOR_END300]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY267]], label [[FOR_END300]] ; CHECK: for.end300: ; CHECK-NEXT: unreachable ; entry: - br i1 undef, label %while.body, label %while.end + br i1 %arg, label %while.body, label %while.end while.body: ; preds = %entry unreachable while.end: ; preds = %entry - br i1 undef, label %for.end80, label %for.body75.lr.ph + br i1 %arg, label %for.end80, label %for.body75.lr.ph for.body75.lr.ph: ; preds = %while.end br label %for.body75 @@ -52,7 +52,7 @@ for.body75: ; preds = %for.body75, %for.bo br label %for.body75 for.end80: ; preds = %while.end - br i1 undef, label %for.end300, label %for.body267.lr.ph + br i1 %arg, label %for.end300, label %for.body267.lr.ph for.body267.lr.ph: ; preds = %for.end80 br label %for.body267 @@ -68,7 +68,7 @@ for.body267: ; preds = %for.body267, %for.b %add295 = fadd double undef, %mul294 %div296 = fdiv double %mul283, %add295 %add297 = fadd double %s.71010, %div296 - br i1 undef, label %for.body267, label %for.end300 + br i1 %arg, label %for.body267, label %for.end300 for.end300: ; preds = %for.body267, %for.end80 unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll index 6ac588524f845..d516126d8412d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll @@ -5,34 +5,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" ; Function Attrs: nounwind ssp uwtable -define void @RCModelEstimator() { +define void @RCModelEstimator(i1 %arg) { ; CHECK-LABEL: @RCModelEstimator( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END_THREAD:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END_THREAD:%.*]] ; CHECK: for.end.thread: ; CHECK-NEXT: unreachable ; CHECK: for.body.lr.ph: -; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY3:%.*]], label [[IF_END103:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY3:%.*]], label [[IF_END103:%.*]] ; CHECK: for.cond14.preheader: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY16_LR_PH:%.*]], label [[IF_END103]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY16_LR_PH:%.*]], label [[IF_END103]] ; CHECK: for.body16.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY16:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: br i1 undef, label [[IF_THEN7:%.*]], label [[FOR_INC11:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN7:%.*]], label [[FOR_INC11:%.*]] ; CHECK: if.then7: ; CHECK-NEXT: br label [[FOR_INC11]] ; CHECK: for.inc11: ; CHECK-NEXT: br i1 false, label [[FOR_COND14_PREHEADER:%.*]], label [[FOR_BODY3]] ; CHECK: for.body16: -; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_BODY16]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END39:%.*]], label [[FOR_BODY16]] ; CHECK: for.end39: -; CHECK-NEXT: br i1 undef, label [[IF_END103]], label [[FOR_COND45_PREHEADER:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END103]], label [[FOR_COND45_PREHEADER:%.*]] ; CHECK: for.cond45.preheader: -; CHECK-NEXT: br i1 undef, label [[IF_THEN88:%.*]], label [[IF_ELSE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN88:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then88: ; CHECK-NEXT: br label [[IF_END103]] ; CHECK: if.else: @@ -41,28 +41,28 @@ define void @RCModelEstimator() { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %for.body.lr.ph, label %for.end.thread + br i1 %arg, label %for.body.lr.ph, label %for.end.thread for.end.thread: ; preds = %entry unreachable for.body.lr.ph: ; preds = %entry - br i1 undef, label %for.end, label %for.body + br i1 %arg, label %for.end, label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - br i1 undef, label %for.end, label %for.body + br i1 %arg, label %for.end, label %for.body for.end: ; preds = %for.body, %for.body.lr.ph - br i1 undef, label %for.body3, label %if.end103 + br i1 %arg, label %for.body3, label %if.end103 for.cond14.preheader: ; preds = %for.inc11 - br i1 undef, label %for.body16.lr.ph, label %if.end103 + br i1 %arg, label %for.body16.lr.ph, label %if.end103 for.body16.lr.ph: ; preds = %for.cond14.preheader br label %for.body16 for.body3: ; preds = %for.inc11, %for.end - br i1 undef, label %if.then7, label %for.inc11 + br i1 %arg, label %if.then7, label %for.inc11 if.then7: ; preds = %for.body3 br label %for.inc11 @@ -71,13 +71,13 @@ for.inc11: ; preds = %if.then7, %for.body br i1 false, label %for.cond14.preheader, label %for.body3 for.body16: ; preds = %for.body16, %for.body16.lr.ph - br i1 undef, label %for.end39, label %for.body16 + br i1 %arg, label %for.end39, label %for.body16 for.end39: ; preds = %for.body16 - br i1 undef, label %if.end103, label %for.cond45.preheader + br i1 %arg, label %if.end103, label %for.cond45.preheader for.cond45.preheader: ; preds = %for.end39 - br i1 undef, label %if.then88, label %if.else + br i1 %arg, label %if.then88, label %if.else if.then88: ; preds = %for.cond45.preheader %mul89 = fmul double 0.000000e+00, 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll index f0d38e1bf269c..3ad0473c84766 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -5,34 +5,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" ; Function Attrs: nounwind ssp uwtable -define void @RCModelEstimator() { +define void @RCModelEstimator(i1 %arg) { ; CHECK-LABEL: @RCModelEstimator( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END_THREAD:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END_THREAD:%.*]] ; CHECK: for.end.thread: ; CHECK-NEXT: unreachable ; CHECK: for.body.lr.ph: -; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY3:%.*]], label [[IF_END103:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY3:%.*]], label [[IF_END103:%.*]] ; CHECK: for.cond14.preheader: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY16_LR_PH:%.*]], label [[IF_END103]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY16_LR_PH:%.*]], label [[IF_END103]] ; CHECK: for.body16.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY16:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: br i1 undef, label [[IF_THEN7:%.*]], label [[FOR_INC11:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN7:%.*]], label [[FOR_INC11:%.*]] ; CHECK: if.then7: ; CHECK-NEXT: br label [[FOR_INC11]] ; CHECK: for.inc11: ; CHECK-NEXT: br i1 false, label [[FOR_COND14_PREHEADER:%.*]], label [[FOR_BODY3]] ; CHECK: for.body16: -; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_BODY16]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END39:%.*]], label [[FOR_BODY16]] ; CHECK: for.end39: -; CHECK-NEXT: br i1 undef, label [[IF_END103]], label [[FOR_COND45_PREHEADER:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END103]], label [[FOR_COND45_PREHEADER:%.*]] ; CHECK: for.cond45.preheader: -; CHECK-NEXT: br i1 undef, label [[IF_THEN88:%.*]], label [[IF_ELSE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN88:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then88: ; CHECK-NEXT: br label [[IF_END103]] ; CHECK: if.else: @@ -41,28 +41,28 @@ define void @RCModelEstimator() { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %for.body.lr.ph, label %for.end.thread + br i1 %arg, label %for.body.lr.ph, label %for.end.thread for.end.thread: ; preds = %entry unreachable for.body.lr.ph: ; preds = %entry - br i1 undef, label %for.end, label %for.body + br i1 %arg, label %for.end, label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - br i1 undef, label %for.end, label %for.body + br i1 %arg, label %for.end, label %for.body for.end: ; preds = %for.body, %for.body.lr.ph - br i1 undef, label %for.body3, label %if.end103 + br i1 %arg, label %for.body3, label %if.end103 for.cond14.preheader: ; preds = %for.inc11 - br i1 undef, label %for.body16.lr.ph, label %if.end103 + br i1 %arg, label %for.body16.lr.ph, label %if.end103 for.body16.lr.ph: ; preds = %for.cond14.preheader br label %for.body16 for.body3: ; preds = %for.inc11, %for.end - br i1 undef, label %if.then7, label %for.inc11 + br i1 %arg, label %if.then7, label %for.inc11 if.then7: ; preds = %for.body3 br label %for.inc11 @@ -71,13 +71,13 @@ for.inc11: ; preds = %if.then7, %for.body br i1 false, label %for.cond14.preheader, label %for.body3 for.body16: ; preds = %for.body16, %for.body16.lr.ph - br i1 undef, label %for.end39, label %for.body16 + br i1 %arg, label %for.end39, label %for.body16 for.end39: ; preds = %for.body16 - br i1 undef, label %if.end103, label %for.cond45.preheader + br i1 %arg, label %if.end103, label %for.cond45.preheader for.cond45.preheader: ; preds = %for.end39 - br i1 undef, label %if.then88, label %if.else + br i1 %arg, label %if.then88, label %if.else if.then88: ; preds = %for.cond45.preheader %mul89 = fmul double 0.000000e+00, 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll index e6c46e1847dac..403a610da8d55 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -define void @main() { +define void @main(i1 %arg) { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -27,17 +27,17 @@ define void @main() { ; CHECK-NEXT: [[ADD19]] = fadd double undef, [[MUL18]] ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MUL13]], [[MUL14]] ; CHECK-NEXT: [[ADD20]] = fadd double undef, [[SUB]] -; CHECK-NEXT: br i1 undef, label [[FOR_BODY12]], label [[FOR_INC21]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY12]], label [[FOR_INC21]] ; CHECK: for.inc21: -; CHECK-NEXT: br i1 undef, label [[FOR_END23:%.*]], label [[FOR_BODY6]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END23:%.*]], label [[FOR_BODY6]] ; CHECK: for.end23: -; CHECK-NEXT: br i1 undef, label [[IF_THEN25:%.*]], label [[IF_THEN26:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN25:%.*]], label [[IF_THEN26:%.*]] ; CHECK: if.then25: -; CHECK-NEXT: br i1 undef, label [[FOR_END44:%.*]], label [[FOR_COND4_PREHEADER]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END44:%.*]], label [[FOR_COND4_PREHEADER]] ; CHECK: if.then26: ; CHECK-NEXT: unreachable ; CHECK: for.end44: -; CHECK-NEXT: br i1 undef, label [[FOR_END48:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END48:%.*]], label [[FOR_BODY]] ; CHECK: for.end48: ; CHECK-NEXT: ret void ; @@ -67,22 +67,22 @@ if.end: ; preds = %for.body12 %add19 = fadd double undef, %mul18 %sub = fsub double %mul13, %mul14 %add20 = fadd double undef, %sub - br i1 undef, label %for.body12, label %for.inc21 + br i1 %arg, label %for.body12, label %for.inc21 for.inc21: ; preds = %if.end, %for.body12 - br i1 undef, label %for.end23, label %for.body6 + br i1 %arg, label %for.end23, label %for.body6 for.end23: ; preds = %for.inc21 - br i1 undef, label %if.then25, label %if.then26 + br i1 %arg, label %if.then25, label %if.then26 if.then25: ; preds = %for.end23 - br i1 undef, label %for.end44, label %for.cond4.preheader + br i1 %arg, label %for.end44, label %for.cond4.preheader if.then26: ; preds = %for.end23 unreachable for.end44: ; preds = %if.then25 - br i1 undef, label %for.end48, label %for.body + br i1 %arg, label %for.end48, label %for.body for.end48: ; preds = %for.end44 ret void @@ -90,7 +90,7 @@ for.end48: ; preds = %for.end44 %struct.hoge = type { double, double, double} -define void @zot(ptr %arg) { +define void @zot(ptr %arg, i1 %arg2) { ; CHECK-LABEL: @zot( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = load double, ptr undef, align 8 @@ -102,7 +102,7 @@ define void @zot(ptr %arg) { ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 -; CHECK-NEXT: br i1 undef, label [[BB11:%.*]], label [[BB12:%.*]] +; CHECK-NEXT: br i1 %arg2, label [[BB11:%.*]], label [[BB12:%.*]] ; CHECK: bb11: ; CHECK-NEXT: br label [[BB14:%.*]] ; CHECK: bb12: @@ -124,7 +124,7 @@ bb: %tmp9 = fsub double %tmp8, undef %tmp10 = getelementptr inbounds %struct.hoge, ptr %arg, i64 0, i32 2 store double %tmp9, ptr %tmp10, align 8 - br i1 undef, label %bb11, label %bb12 + br i1 %arg2, label %bb11, label %bb12 bb11: ; preds = %bb br label %bb14 @@ -140,15 +140,15 @@ bb14: ; preds = %bb12, %bb11 %struct.rc4_state.0.24 = type { i32, i32, [256 x i32] } -define void @rc4_crypt(ptr nocapture %s) { +define void @rc4_crypt(ptr nocapture %s, i1 %arg) { ; CHECK-LABEL: @rc4_crypt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y2:%.*]] = getelementptr inbounds [[STRUCT_RC4_STATE_0_24:%.*]], ptr [[S:%.*]], i64 0, i32 1 -; CHECK-NEXT: br i1 undef, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[CONV4:%.*]] = and i32 undef, 255 ; CHECK-NEXT: [[CONV7:%.*]] = and i32 undef, 255 -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[CONV4]], [[FOR_BODY]] ] ; CHECK-NEXT: [[Y_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[CONV7]], [[FOR_BODY]] ] @@ -158,14 +158,14 @@ define void @rc4_crypt(ptr nocapture %s) { ; entry: %y2 = getelementptr inbounds %struct.rc4_state.0.24, ptr %s, i64 0, i32 1 - br i1 undef, label %for.body, label %for.end + br i1 %arg, label %for.body, label %for.end for.body: ; preds = %for.body, %entry %x.045 = phi i32 [ %conv4, %for.body ], [ undef, %entry ] %conv4 = and i32 undef, 255 %conv7 = and i32 undef, 255 %idxprom842 = zext i32 %conv7 to i64 - br i1 undef, label %for.end, label %for.body + br i1 %arg, label %for.end, label %for.body for.end: ; preds = %for.body, %entry %x.0.lcssa = phi i32 [ undef, %entry ], [ %conv4, %for.body ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll index a9f92f324d6f5..d434035051f5e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll @@ -6,28 +6,28 @@ target triple = "x86_64-apple-macosx10.8.0" %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171 = type { i32, i32, i32, i32, i32, i32, [8 x i8] } -define void @SIM4() { +define void @SIM4(i1 %arg) { ; CHECK-LABEL: @SIM4( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: br i1 undef, label [[RETURN]], label [[IF_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[RETURN]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: br i1 undef, label [[FOR_END605:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END605:%.*]], label [[FOR_BODY_LR_PH:%.*]] ; CHECK: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_INC603:%.*]], label [[IF_END12:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_INC603:%.*]], label [[IF_END12:%.*]] ; CHECK: if.end12: -; CHECK-NEXT: br i1 undef, label [[LAND_LHS_TRUE:%.*]], label [[LAND_LHS_TRUE167:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LAND_LHS_TRUE:%.*]], label [[LAND_LHS_TRUE167:%.*]] ; CHECK: land.lhs.true: -; CHECK-NEXT: br i1 undef, label [[IF_THEN17:%.*]], label [[LAND_LHS_TRUE167]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN17:%.*]], label [[LAND_LHS_TRUE167]] ; CHECK: if.then17: -; CHECK-NEXT: br i1 undef, label [[IF_END98:%.*]], label [[LAND_RHS_LR_PH:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END98:%.*]], label [[LAND_RHS_LR_PH:%.*]] ; CHECK: land.rhs.lr.ph: ; CHECK-NEXT: unreachable ; CHECK: if.end98: -; CHECK-NEXT: br i1 undef, label [[LAND_LHS_TRUE167]], label [[IF_THEN103:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LAND_LHS_TRUE167]], label [[IF_THEN103:%.*]] ; CHECK: if.then103: ; CHECK-NEXT: [[DOTSUB100:%.*]] = select i1 undef, i32 250, i32 undef ; CHECK-NEXT: [[MUL114:%.*]] = shl nsw i32 [[DOTSUB100]], 2 @@ -37,11 +37,11 @@ define void @SIM4() { ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK: for.cond.i: ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ undef, [[LAND_RHS_I874:%.*]] ], [ [[TMP1]], [[IF_THEN103]] ] -; CHECK-NEXT: br i1 undef, label [[LAND_RHS_I874]], label [[FOR_END_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LAND_RHS_I874]], label [[FOR_END_I:%.*]] ; CHECK: land.rhs.i874: -; CHECK-NEXT: br i1 undef, label [[FOR_COND_I]], label [[FOR_END_I]] +; CHECK-NEXT: br i1 %arg, label [[FOR_COND_I]], label [[FOR_END_I]] ; CHECK: for.end.i: -; CHECK-NEXT: br i1 undef, label [[IF_THEN_I:%.*]], label [[IF_END_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN_I:%.*]], label [[IF_END_I:%.*]] ; CHECK: if.then.i: ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], undef ; CHECK-NEXT: br label [[EXTEND_BW_EXIT:%.*]] @@ -52,15 +52,15 @@ define void @SIM4() { ; CHECK: for.body28.lr.ph.i: ; CHECK-NEXT: br label [[FOR_END33_I]] ; CHECK: for.end33.i: -; CHECK-NEXT: br i1 undef, label [[FOR_END58_I:%.*]], label [[FOR_BODY52_LR_PH_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END58_I:%.*]], label [[FOR_BODY52_LR_PH_I:%.*]] ; CHECK: for.body52.lr.ph.i: ; CHECK-NEXT: br label [[FOR_END58_I]] ; CHECK: for.end58.i: ; CHECK-NEXT: br label [[WHILE_COND260_I:%.*]] ; CHECK: while.cond260.i: -; CHECK-NEXT: br i1 undef, label [[LAND_RHS263_I:%.*]], label [[WHILE_END275_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[LAND_RHS263_I:%.*]], label [[WHILE_END275_I:%.*]] ; CHECK: land.rhs263.i: -; CHECK-NEXT: br i1 undef, label [[WHILE_COND260_I]], label [[WHILE_END275_I]] +; CHECK-NEXT: br i1 %arg, label [[WHILE_COND260_I]], label [[WHILE_END275_I]] ; CHECK: while.end275.i: ; CHECK-NEXT: br label [[EXTEND_BW_EXIT]] ; CHECK: extend_bw.exit: @@ -73,42 +73,42 @@ define void @SIM4() { ; CHECK: land.lhs.true167: ; CHECK-NEXT: unreachable ; CHECK: for.inc603: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END605]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY]], label [[FOR_END605]] ; CHECK: for.end605: ; CHECK-NEXT: unreachable ; CHECK: return: ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %return, label %lor.lhs.false + br i1 %arg, label %return, label %lor.lhs.false lor.lhs.false: ; preds = %entry - br i1 undef, label %return, label %if.end + br i1 %arg, label %return, label %if.end if.end: ; preds = %lor.lhs.false - br i1 undef, label %for.end605, label %for.body.lr.ph + br i1 %arg, label %for.end605, label %for.body.lr.ph for.body.lr.ph: ; preds = %if.end br label %for.body for.body: ; preds = %for.inc603, %for.body.lr.ph - br i1 undef, label %for.inc603, label %if.end12 + br i1 %arg, label %for.inc603, label %if.end12 if.end12: ; preds = %for.body - br i1 undef, label %land.lhs.true, label %land.lhs.true167 + br i1 %arg, label %land.lhs.true, label %land.lhs.true167 land.lhs.true: ; preds = %if.end12 - br i1 undef, label %if.then17, label %land.lhs.true167 + br i1 %arg, label %if.then17, label %land.lhs.true167 if.then17: ; preds = %land.lhs.true - br i1 undef, label %if.end98, label %land.rhs.lr.ph + br i1 %arg, label %if.end98, label %land.rhs.lr.ph land.rhs.lr.ph: ; preds = %if.then17 unreachable if.end98: ; preds = %if.then17 %from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171, ptr undef, i64 0, i32 1 - br i1 undef, label %land.lhs.true167, label %if.then103 + br i1 %arg, label %land.lhs.true167, label %if.then103 if.then103: ; preds = %if.end98 %.sub100 = select i1 undef, i32 250, i32 undef @@ -119,13 +119,13 @@ if.then103: ; preds = %if.end98 for.cond.i: ; preds = %land.rhs.i874, %if.then103 %row.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %.sub100, %if.then103 ] %col.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %cond125, %if.then103 ] - br i1 undef, label %land.rhs.i874, label %for.end.i + br i1 %arg, label %land.rhs.i874, label %for.end.i land.rhs.i874: ; preds = %for.cond.i - br i1 undef, label %for.cond.i, label %for.end.i + br i1 %arg, label %for.cond.i, label %for.end.i for.end.i: ; preds = %land.rhs.i874, %for.cond.i - br i1 undef, label %if.then.i, label %if.end.i + br i1 %arg, label %if.then.i, label %if.end.i if.then.i: ; preds = %for.end.i %add14.i = add nsw i32 %row.0.i, undef @@ -141,7 +141,7 @@ for.body28.lr.ph.i: ; preds = %if.end.i br label %for.end33.i for.end33.i: ; preds = %for.body28.lr.ph.i, %if.end.i - br i1 undef, label %for.end58.i, label %for.body52.lr.ph.i + br i1 %arg, label %for.end58.i, label %for.body52.lr.ph.i for.body52.lr.ph.i: ; preds = %for.end33.i br label %for.end58.i @@ -150,10 +150,10 @@ for.end58.i: ; preds = %for.body52.lr.ph.i, br label %while.cond260.i while.cond260.i: ; preds = %land.rhs263.i, %for.end58.i - br i1 undef, label %land.rhs263.i, label %while.end275.i + br i1 %arg, label %land.rhs263.i, label %while.end275.i land.rhs263.i: ; preds = %while.cond260.i - br i1 undef, label %while.cond260.i, label %while.end275.i + br i1 %arg, label %while.cond260.i, label %while.end275.i while.end275.i: ; preds = %land.rhs263.i, %while.cond260.i br label %extend_bw.exit @@ -174,7 +174,7 @@ land.lhs.true167: ; preds = %if.then157, %extend unreachable for.inc603: ; preds = %for.body - br i1 undef, label %for.body, label %for.end605 + br i1 %arg, label %for.body, label %for.end605 for.end605: ; preds = %for.inc603, %if.end unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index 42ad20ff578c1..d13a8578d1e00 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -4,25 +4,25 @@ %struct.Ray = type { %struct.Vec, %struct.Vec } %struct.Vec = type { double, double, double } -define void @main() { +define void @main(i1 %arg) { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[COND_TRUE:%.*]], label [[COND_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[COND_TRUE:%.*]], label [[COND_END:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: unreachable ; CHECK: cond.end: ; CHECK-NEXT: br label [[INVOKE_CONT:%.*]] ; CHECK: invoke.cont: -; CHECK-NEXT: br i1 undef, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]] +; CHECK-NEXT: br i1 %arg, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]] ; CHECK: arrayctor.cont: ; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0 ; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]] ; CHECK: for.cond36.preheader: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]] ; CHECK: cond.false51.us: ; CHECK-NEXT: unreachable ; CHECK: cond.true48.us: -; CHECK-NEXT: br i1 undef, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]] +; CHECK-NEXT: br i1 %arg, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]] ; CHECK: cond.false66.us: ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double [[ADD_I276_US]], i32 0 @@ -36,12 +36,12 @@ define void @main() { ; CHECK: cond.true63.us: ; CHECK-NEXT: unreachable ; CHECK: for.body42.lr.ph.us: -; CHECK-NEXT: br i1 undef, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]] +; CHECK-NEXT: br i1 %arg, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]] ; CHECK: _Z5clampd.exit.1: ; CHECK-NEXT: br label [[FOR_COND36_PREHEADER]] ; entry: - br i1 undef, label %cond.true, label %cond.end + br i1 %arg, label %cond.true, label %cond.end cond.true: unreachable @@ -50,7 +50,7 @@ cond.end: br label %invoke.cont invoke.cont: - br i1 undef, label %arrayctor.cont, label %invoke.cont + br i1 %arg, label %arrayctor.cont, label %invoke.cont arrayctor.cont: %agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 0, i32 1 @@ -59,13 +59,13 @@ arrayctor.cont: br label %for.cond36.preheader for.cond36.preheader: - br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1 + br i1 %arg, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1 cond.false51.us: unreachable cond.true48.us: - br i1 undef, label %cond.true63.us, label %cond.false66.us + br i1 %arg, label %cond.true63.us, label %cond.false66.us cond.false66.us: %add.i276.us = fadd double 0.000000e+00, 0.000001e+00 @@ -87,16 +87,16 @@ cond.true63.us: unreachable for.body42.lr.ph.us: - br i1 undef, label %cond.true48.us, label %cond.false51.us + br i1 %arg, label %cond.true48.us, label %cond.false51.us _Z5clampd.exit.1: br label %for.cond36.preheader } -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0 ; CHECK-NEXT: store <2 x double> , ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8 @@ -105,7 +105,7 @@ define void @test() { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %if.then78, label %if.then38 + br i1 %arg, label %if.then78, label %if.then38 if.then38: %mul.i.i790 = fmul double 0.0, 0.1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll index e3a860a4c6f06..c7c4b06be2d19 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll @@ -16,7 +16,7 @@ target triple = "x86_64-apple-macosx10.9.0" ;define fastcc void @bar() { -define void @bar() { +define void @bar(i1 %arg) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr undef, i64 0, i32 1, i32 0 @@ -32,16 +32,16 @@ define void @bar() { ; CHECK-NEXT: store double [[I7]], ptr [[I1]], align 8 ; CHECK-NEXT: [[I10]] = load double, ptr [[I3]], align 8 ; CHECK-NEXT: [[TMP0]] = load <2 x double>, ptr [[I2]], align 8 -; CHECK-NEXT: br i1 undef, label [[BB11:%.*]], label [[BB12:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB11:%.*]], label [[BB12:%.*]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: ; CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[I4]], align 8 -; CHECK-NEXT: br i1 undef, label [[BB13:%.*]], label [[BB14:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB13:%.*]], label [[BB14:%.*]] ; CHECK: bb13: ; CHECK-NEXT: br label [[BB14]] ; CHECK: bb14: -; CHECK-NEXT: br i1 undef, label [[BB15:%.*]], label [[BB16]] +; CHECK-NEXT: br i1 %arg, label [[BB15:%.*]], label [[BB16]] ; CHECK: bb15: ; CHECK-NEXT: unreachable ; CHECK: bb16: @@ -51,7 +51,7 @@ define void @bar() { ; CHECK-NEXT: i32 103, label [[BB6]] ; CHECK-NEXT: ] ; CHECK: bb17: -; CHECK-NEXT: br i1 undef, label [[BB6]], label [[BB18:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB6]], label [[BB18:%.*]] ; CHECK: bb18: ; CHECK-NEXT: unreachable ; @@ -71,7 +71,7 @@ bb6: ; preds = %bb17, %bb16, %bb16, store double %i7, ptr %i1, align 8 %i9 = load double, ptr %i2, align 8 %i10 = load double, ptr %i3, align 8 - br i1 undef, label %bb11, label %bb12 + br i1 %arg, label %bb11, label %bb12 bb11: ; preds = %bb6 ret void @@ -79,13 +79,13 @@ bb11: ; preds = %bb6 bb12: ; preds = %bb6 store double %i9, ptr %i4, align 8 store double %i10, ptr %i5, align 8 - br i1 undef, label %bb13, label %bb14 + br i1 %arg, label %bb13, label %bb14 bb13: ; preds = %bb12 br label %bb14 bb14: ; preds = %bb13, %bb12 - br i1 undef, label %bb15, label %bb16 + br i1 %arg, label %bb15, label %bb16 bb15: ; preds = %bb14 unreachable @@ -97,7 +97,7 @@ bb16: ; preds = %bb14 ] bb17: ; preds = %bb16 - br i1 undef, label %bb6, label %bb18 + br i1 %arg, label %bb6, label %bb18 bb18: ; preds = %bb17 unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll index 739e3964c2685..7510b8fb83e34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -280,10 +280,10 @@ return: ; preds = %entry, %if.end @a = external global double, align 8 -define void @PR19646(ptr %this) { +define void @PR19646(ptr %this, i1 %arg) { ; CHECK-LABEL: @PR19646( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[IF_END13:%.*]], label [[IF_END13]] +; CHECK-NEXT: br i1 %arg, label [[IF_END13:%.*]], label [[IF_END13]] ; CHECK: sw.epilog7: ; CHECK-NEXT: [[DOTIN:%.*]] = getelementptr inbounds [[CLASS_B_53_55:%.*]], ptr [[THIS:%.*]], i64 0, i32 0, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[DOTIN]], align 8 @@ -294,7 +294,7 @@ define void @PR19646(ptr %this) { ; CHECK-NEXT: [[_DY:%.*]] = getelementptr inbounds [[CLASS_B_53_55]], ptr [[THIS]], i64 0, i32 0, i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[_DY]], align 8 ; CHECK-NEXT: [[ADD10:%.*]] = fadd double [[ADD8]], [[TMP2]] -; CHECK-NEXT: br i1 undef, label [[IF_THEN12:%.*]], label [[IF_END13]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN12:%.*]], label [[IF_END13]] ; CHECK: if.then12: ; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: br label [[IF_END13]] @@ -304,7 +304,7 @@ define void @PR19646(ptr %this) { ; CHECK-NEXT: unreachable ; entry: - br i1 undef, label %if.end13, label %if.end13 + br i1 %arg, label %if.end13, label %if.end13 sw.epilog7: ; No predecessors! %.in = getelementptr inbounds %class.B.53.55, ptr %this, i64 0, i32 0, i32 1 @@ -316,7 +316,7 @@ sw.epilog7: ; No predecessors! %_dy = getelementptr inbounds %class.B.53.55, ptr %this, i64 0, i32 0, i32 2 %2 = load double, ptr %_dy, align 8 %add10 = fadd double %add8, %2 - br i1 undef, label %if.then12, label %if.end13 + br i1 %arg, label %if.then12, label %if.end13 if.then12: ; preds = %sw.epilog7 %3 = load double, ptr undef, align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll index f614796916baa..d474218e84cca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll @@ -116,7 +116,7 @@ then: %struct.wombat.0 = type { %struct.bar } %struct.bar = type { [3 x double], [3 x double], double, double, i16, ptr, i32, [3 x double] } -define double @preserve_loop_info(ptr %arg) { +define double @preserve_loop_info(ptr %arg, i1 %arg2) { ; CHECK-LABEL: @preserve_loop_info( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = alloca [3 x double], align 16 @@ -124,15 +124,15 @@ define double @preserve_loop_info(ptr %arg) { ; CHECK: outer.header: ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: -; CHECK-NEXT: br i1 undef, label [[OUTER_LATCH:%.*]], label [[INNER]] +; CHECK-NEXT: br i1 %arg2, label [[OUTER_LATCH:%.*]], label [[INNER]] ; CHECK: outer.latch: -; CHECK-NEXT: br i1 undef, label [[BB:%.*]], label [[OUTER_HEADER]] +; CHECK-NEXT: br i1 %arg2, label [[BB:%.*]], label [[OUTER_HEADER]] ; CHECK: bb: ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr undef, align 8 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP_3HEADER:%.*]] ; CHECK: loop.3header: -; CHECK-NEXT: br i1 undef, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]] +; CHECK-NEXT: br i1 %arg2, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]] ; CHECK: bb9: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP5]], i64 undef, i64 1 ; CHECK-NEXT: store double undef, ptr [[TMP]], align 16 @@ -140,7 +140,7 @@ define double @preserve_loop_info(ptr %arg) { ; CHECK-NEXT: store double [[TMP12]], ptr [[TMP7]], align 8 ; CHECK-NEXT: br label [[LOOP_3LATCH]] ; CHECK: loop.3latch: -; CHECK-NEXT: br i1 undef, label [[BB14:%.*]], label [[LOOP_3HEADER]] +; CHECK-NEXT: br i1 %arg2, label [[BB14:%.*]], label [[LOOP_3HEADER]] ; CHECK: bb14: ; CHECK-NEXT: [[TMP15:%.*]] = call double undef(ptr [[TMP]], ptr [[ARG:%.*]]) ; CHECK-NEXT: ret double undef @@ -153,10 +153,10 @@ outer.header: ; preds = %bb3, %bb br label %inner inner: - br i1 undef, label %outer.latch, label %inner + br i1 %arg2, label %outer.latch, label %inner outer.latch: ; preds = %bb16 - br i1 undef, label %bb, label %outer.header + br i1 %arg2, label %bb, label %outer.header bb: ; preds = %bb3 %tmp5 = load ptr, ptr undef, align 8 @@ -164,7 +164,7 @@ bb: ; preds = %bb3 br label %loop.3header loop.3header: ; preds = %bb13, %bb4 - br i1 undef, label %loop.3latch, label %bb9 + br i1 %arg2, label %loop.3latch, label %bb9 bb9: ; preds = %bb8 %tmp10 = getelementptr inbounds [3 x double], ptr %tmp5, i64 undef, i64 1 @@ -174,7 +174,7 @@ bb9: ; preds = %bb8 br label %loop.3latch loop.3latch: ; preds = %bb11, %bb8 - br i1 undef, label %bb14, label %loop.3header + br i1 %arg2, label %bb14, label %loop.3header bb14: ; preds = %bb13 %tmp15 = call double undef(ptr %tmp, ptr %arg) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll b/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll index 43d8c1342cbbc..eea22c1861b44 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes=slp-vectorizer -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: define void @test -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK-SAME: (i1 %arg) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br i1 %arg, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> ) ; CHECK-NEXT: call void @f(i32 noundef [[TMP1]]) @@ -20,7 +20,7 @@ define void @test() { %i32 = extractelement <4 x i32> zeroinitializer, i64 1 %i33 = extractelement <4 x i32> zeroinitializer, i64 2 %i34 = extractelement <4 x i32> zeroinitializer, i64 3 - br i1 undef, label %bb1, label %bb2 + br i1 %arg, label %bb1, label %bb2 bb1: %i11 = mul nsw i32 %i28, %i27 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll index ad3ebf57ab7a4..f9d6c3eab350f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll @@ -26,10 +26,10 @@ entry: declare ptr @objc_msgSend(ptr, ptr, ...) declare i32 @personality_v0(...) -define void @invoketest() personality ptr @personality_v0 { +define void @invoketest(i1 %arg) personality ptr @personality_v0 { ; CHECK-LABEL: @invoketest( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[CALL49:%.*]] = invoke double @objc_msgSend(ptr undef, ptr undef) ; CHECK-NEXT: to label [[COND_TRUE54:%.*]] unwind label [[LPAD:%.*]] @@ -43,7 +43,7 @@ define void @invoketest() personality ptr @personality_v0 { ; CHECK-NEXT: [[CALL59:%.*]] = invoke double @objc_msgSend(ptr undef, ptr undef) ; CHECK-NEXT: to label [[COND_END60]] unwind label [[LPAD]] ; CHECK: cond.end60: -; CHECK-NEXT: br i1 undef, label [[IF_END98:%.*]], label [[IF_THEN63:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END98:%.*]], label [[IF_THEN63:%.*]] ; CHECK: if.then63: ; CHECK-NEXT: br label [[IF_END98]] ; CHECK: lpad: @@ -56,7 +56,7 @@ define void @invoketest() personality ptr @personality_v0 { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %cond.true, label %cond.false + br i1 %arg, label %cond.true, label %cond.false cond.true: %call49 = invoke double @objc_msgSend(ptr undef, ptr undef) @@ -77,7 +77,7 @@ cond.false57: cond.end60: %cond126 = phi double [ %call49, %cond.true54 ], [ %call51, %cond.false57 ] %cond61 = phi double [ %call56, %cond.true54 ], [ %call59, %cond.false57 ] - br i1 undef, label %if.end98, label %if.then63 + br i1 %arg, label %if.end98, label %if.then63 if.then63: %conv69 = fptrunc double undef to float diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index 20ad09a632826..4cdf9670394f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -2,12 +2,12 @@ ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s ; Function Attrs: nounwind uwtable -define void @get_block(i32 %y_pos) local_unnamed_addr #0 { +define void @get_block(i32 %y_pos, i1 %arg) local_unnamed_addr #0 { ; CHECK-LABEL: @get_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LAND_LHS_TRUE:%.*]] ; CHECK: land.lhs.true: -; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: unreachable ; CHECK: if.end: @@ -43,7 +43,7 @@ entry: br label %land.lhs.true land.lhs.true: ; preds = %entry - br i1 undef, label %if.then, label %if.end + br i1 %arg, label %if.then, label %if.end if.then: ; preds = %land.lhs.true unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index 78bfb8df51aeb..df85656800aac 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -277,13 +277,13 @@ for.end: ; preds = %for.body ret float %add31 } -define void @test(ptr %i1, ptr %i2, ptr %o) { +define void @test(ptr %i1, ptr %i2, ptr %o, i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I1_0:%.*]] = load x86_fp80, ptr [[I1:%.*]], align 16 ; CHECK-NEXT: [[I1_GEP1:%.*]] = getelementptr x86_fp80, ptr [[I1]], i64 1 ; CHECK-NEXT: [[I1_1:%.*]] = load x86_fp80, ptr [[I1_GEP1]], align 16 -; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[I2_0:%.*]] = load x86_fp80, ptr [[I2:%.*]], align 16 ; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, ptr [[I2]], i64 1 @@ -305,7 +305,7 @@ entry: %i1.0 = load x86_fp80, ptr %i1, align 16 %i1.gep1 = getelementptr x86_fp80, ptr %i1, i64 1 %i1.1 = load x86_fp80, ptr %i1.gep1, align 16 - br i1 undef, label %then, label %end + br i1 %arg, label %then, label %end then: %i2.0 = load x86_fp80, ptr %i2, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll index 88ac2d9dc42d1..ddbe943e32446 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i386-apple-macosx10.9.0" -define void @test(ptr %i1, ptr %i2, ptr %o) { +define void @test(ptr %i1, ptr %i2, ptr %o, i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I1_0:%.*]] = load double, ptr [[I1:%.*]], align 16 @@ -14,7 +14,7 @@ define void @test(ptr %i1, ptr %i2, ptr %o) { ; CHECK-NEXT: [[I1_1:%.*]] = load double, ptr [[I1_GEP1]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I1_0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1_1]], i32 1 -; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[I2_0:%.*]] = load double, ptr [[I2:%.*]], align 16 ; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds double, ptr [[I2]], i64 1 @@ -38,7 +38,7 @@ entry: %i1.0 = load double, ptr %i1, align 16 %i1.gep1 = getelementptr double, ptr %i1, i64 1 %i1.1 = load double, ptr %i1.gep1, align 16 - br i1 undef, label %then, label %end + br i1 %arg, label %then, label %end then: %i2.0 = load double, ptr %i2, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16571.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16571.ll index b61b004797959..733033efa3bfe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr16571.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16571.ll @@ -3,9 +3,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32" target triple = "i686-pc-win32" -define hidden fastcc void @"System.PrimitiveTypesParser.TryParseIEEE754(char*,uint,double&)"() unnamed_addr { +define hidden fastcc void @"System.PrimitiveTypesParser.TryParseIEEE754(char*,uint,double&)"(i1 %arg) unnamed_addr { "@0": - br i1 undef, label %"@38.lr.ph", label %"@37" + br i1 %arg, label %"@38.lr.ph", label %"@37" "@37": ; preds = %"@38.lr.ph", %"@44", %"@0" ret void @@ -13,10 +13,10 @@ define hidden fastcc void @"System.PrimitiveTypesParser.TryParseIEEE754(ch "@44": ; preds = %"@38.lr.ph" %0 = add i64 undef, undef %1 = add i32 %mainPartDigits.loc.0.ph45, 1 - br i1 undef, label %"@38.lr.ph", label %"@37" + br i1 %arg, label %"@38.lr.ph", label %"@37" "@38.lr.ph": ; preds = %"@44", %"@0" %mainDoublePart.loc.0.ph46 = phi i64 [ %0, %"@44" ], [ 0, %"@0" ] %mainPartDigits.loc.0.ph45 = phi i32 [ %1, %"@44" ], [ 0, %"@0" ] - br i1 undef, label %"@44", label %"@37" + br i1 %arg, label %"@44", label %"@37" } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll index 540ea4eb659fe..ac8b2428a3dea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll @@ -3,13 +3,13 @@ ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s ; This type is not supported by SLP -define i1 @test(ptr %i1, ptr %i2) { +define i1 @test(ptr %i1, ptr %i2, i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I1_0:%.*]] = load x86_fp80, ptr [[I1:%.*]], align 16 ; CHECK-NEXT: [[I1_GEP1:%.*]] = getelementptr x86_fp80, ptr [[I1]], i64 1 ; CHECK-NEXT: [[I1_1:%.*]] = load x86_fp80, ptr [[I1_GEP1]], align 16 -; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[THEN:%.*]], label [[END:%.*]] ; CHECK: then: ; CHECK-NEXT: [[I2_0:%.*]] = load x86_fp80, ptr [[I2:%.*]], align 16 ; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, ptr [[I2]], i64 1 @@ -25,7 +25,7 @@ entry: %i1.0 = load x86_fp80, ptr %i1, align 16 %i1.gep1 = getelementptr x86_fp80, ptr %i1, i64 1 %i1.1 = load x86_fp80, ptr %i1.gep1, align 16 - br i1 undef, label %then, label %end + br i1 %arg, label %then, label %end then: %i2.0 = load x86_fp80, ptr %i2, align 16 %i2.gep1 = getelementptr inbounds x86_fp80, ptr %i2, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index f9815529a2375..5f2199aef17ee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -3,10 +3,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define void @hoge() { +define void @hoge(i1 %arg) { ; CHECK-LABEL: @hoge( ; CHECK-NEXT: bb: -; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: ret void ; CHECK: bb2: @@ -57,7 +57,7 @@ define void @hoge() { ; CHECK-NEXT: unreachable ; bb: - br i1 undef, label %bb1, label %bb2 + br i1 %arg, label %bb1, label %bb2 bb1: ; preds = %bb ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll index 69b4639d9c131..2e6df0007e08b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll @@ -42,16 +42,16 @@ ; comment out reorderTopToBottom() and remove the stores. -define void @reorder_crash(ptr %ptr) { +define void @reorder_crash(ptr %ptr, i1 %arg) { ; CHECK-LABEL: @reorder_crash( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[BB0:%.*]], label [[BB12:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB0:%.*]], label [[BB12:%.*]] ; CHECK: bb0: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[PTR]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb12: -; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[PTR]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[PTR]], align 4 @@ -69,7 +69,7 @@ entry: %gep1 = getelementptr inbounds float, ptr %ptr, i64 1 %gep2 = getelementptr inbounds float, ptr %ptr, i64 2 %gep3 = getelementptr inbounds float, ptr %ptr, i64 3 - br i1 undef, label %bb0, label %bb12 + br i1 %arg, label %bb0, label %bb12 bb0: ; Used by phi in this order: 1, 0, 2, 3 @@ -86,7 +86,7 @@ bb0: br label %bb3 bb12: - br i1 undef, label %bb1, label %bb2 + br i1 %arg, label %bb1, label %bb2 bb1: ; Used by phi in this order: 1, 0, 2, 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index cb955ff91ed81..ef1149a108e29 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -4,10 +4,10 @@ %"struct.std::array" = type { [32 x i8] } ; Function Attrs: nounwind uwtable -define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() unnamed_addr #0 align 2 { +define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i1 %arg) unnamed_addr #0 align 2 { ; CHECK-LABEL: @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] @@ -36,7 +36,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %if.end50.i, label %if.then22.i + br i1 %arg, label %if.end50.i, label %if.then22.i if.then22.i: ; preds = %entry %sub.i = add nsw i32 undef, -1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll index 79ce74bd21dbc..b900bd3a8c331 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 %struct.S = type { [3 x float], [3 x float], [4 x float] } -define i32 @foo(i32 %0, ptr %1, ptr %2) { +define i32 @foo(i32 %0, ptr %1, ptr %2, i1 %arg) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[T4:%.*]] = alloca [[STRUCT_S:%.*]], align 8 ; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 1 @@ -19,7 +19,7 @@ define i32 @foo(i32 %0, ptr %1, ptr %2) { ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 2, i64 0 ; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[T21]], align 4 ; CHECK-NEXT: [[T89]] = load <2 x float>, ptr [[T9]], align 4 -; CHECK-NEXT: br i1 undef, label [[T37]], label [[T55:%.*]] +; CHECK-NEXT: br i1 %arg, label [[T37]], label [[T55:%.*]] ; CHECK: t55: ; CHECK-NEXT: ret i32 0 ; @@ -49,7 +49,7 @@ t37: %t89 = load <2 x float>, ptr %t9, align 4 %x23 = extractelement <2 x float> %t89, i32 0 %x24 = extractelement <2 x float> %t89, i32 1 - br i1 undef, label %t37, label %t55 + br i1 %arg, label %t37, label %t55 t55: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll index edd1a2a3a2fff..364b0f4c1a3a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll @@ -12,10 +12,10 @@ ; iteration (it was matched and vectorized, which added a use of a deleted ; instruction) -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_END:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; CHECK: for.cond.preheader: ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 2 ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 3 @@ -35,7 +35,7 @@ define void @test() { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %if.end, label %for.cond.preheader + br i1 %arg, label %if.end, label %for.cond.preheader for.cond.preheader: ; preds = %entry %i = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll index eb5e218f057ce..ac9454967719e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll @@ -8,7 +8,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; as elements of the vectorized tree. ; PR19621 -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] @@ -22,13 +22,13 @@ define void @test() { ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: -; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] +; CHECK-NEXT: br i1 %arg, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] -; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] +; CHECK-NEXT: br i1 %arg, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], @@ -57,7 +57,7 @@ bb284: br label %bb21.i bb21.i: - br i1 undef, label %bb22.i, label %exit + br i1 %arg, label %bb22.i, label %exit bb22.i: %tmp24.i = fadd double undef, %tmp9.i @@ -67,7 +67,7 @@ bb22.i: bb32.i: %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] - br i1 undef, label %bb32.i, label %bb21.i + br i1 %arg, label %bb32.i, label %bb21.i exit: %tmp303 = fpext float %Av.sroa.0.0 to double diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll index f870cb44f4e5f..c5cdcdc1eb1a5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -8,7 +8,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; as elements of the vectorized tree. ; PR19621 -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] @@ -22,13 +22,13 @@ define void @test() { ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: -; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] +; CHECK-NEXT: br i1 %arg, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] -; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] +; CHECK-NEXT: br i1 %arg, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], @@ -57,7 +57,7 @@ bb284: br label %bb21.i bb21.i: - br i1 undef, label %bb22.i, label %exit + br i1 %arg, label %bb22.i, label %exit bb22.i: %tmp24.i = fadd double undef, %tmp9.i @@ -67,7 +67,7 @@ bb22.i: bb32.i: %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] - br i1 undef, label %bb32.i, label %bb21.i + br i1 %arg, label %bb32.i, label %bb21.i exit: %tmp303 = fpext float %Av.sroa.0.0 to double diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index 17f9f371ff6ef..a69849fabcef6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=slp-vectorizer -S -mcpu=cascadelake -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -define void @foo() { +define void @foo(i1 %arg) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = uitofp i16 undef to float @@ -14,7 +14,7 @@ define void @foo() { ; CHECK: bb2: ; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 -; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: ; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> ; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double @@ -46,7 +46,7 @@ bb2: %2 = phi float [ undef, %bb1 ], [ %11, %bb3 ] %3 = phi float [ undef, %bb1 ], [ %12, %bb3 ] %4 = load double, ptr undef, align 8 - br i1 undef, label %bb3, label %bb4 + br i1 %arg, label %bb3, label %bb4 bb4: %ext = fpext float %3 to double diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll index 9d6371b13e08a..a728515f0fdc3 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -7,10 +7,10 @@ ; to UMax and thus same reduction kind is returned. ; The routine's later code merely assumes the instruction to be a select. -define dso_local void @test() { +define dso_local void @test(i1 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[NEXT:%.*]], label [[THEN:%.*]] +; CHECK-NEXT: br i1 %arg, label [[NEXT:%.*]], label [[THEN:%.*]] ; CHECK: then: ; CHECK-NEXT: [[UM:%.*]] = call i8 @llvm.umax.i8(i8 0, i8 undef) ; CHECK-NEXT: [[SELCMP:%.*]] = icmp ult i8 [[UM]], undef @@ -21,7 +21,7 @@ define dso_local void @test() { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %next, label %then + br i1 %arg, label %next, label %then then: %um = call i8 @llvm.umax.i8(i8 0, i8 undef) diff --git a/llvm/test/Transforms/Scalarizer/crash-bug.ll b/llvm/test/Transforms/Scalarizer/crash-bug.ll index 2195a37564c6a..692fc07e3ea5c 100644 --- a/llvm/test/Transforms/Scalarizer/crash-bug.ll +++ b/llvm/test/Transforms/Scalarizer/crash-bug.ll @@ -3,14 +3,14 @@ ; Don't crash -define void @foo() { -; CHECK-LABEL: define void @foo() { +define void @foo(i1 %arg) { +; CHECK-LABEL: define void @foo(i1 %arg) { ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: ; CHECK-NEXT: [[BB2_VEC_I1:%.*]] = phi i16 [ 200, [[TMP0:%.*]] ], [ [[BB2_VEC_I1]], [[BB2:%.*]] ] -; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB2]] +; CHECK-NEXT: br i1 %arg, label [[BB3:%.*]], label [[BB2]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; @@ -22,7 +22,7 @@ bb2: ; preds = %bb1 bb1: ; preds = %bb2, %0 %bb1_vec = phi <2 x i16> [ , %0 ], [ %bb2_vec, %bb2 ] - br i1 undef, label %bb3, label %bb2 + br i1 %arg, label %bb3, label %bb2 bb3: ret void diff --git a/llvm/test/Transforms/Scalarizer/dbgloc-bug-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/dbgloc-bug-inseltpoison.ll index e5ba35ca2c4ee..4f4dff84e1294 100644 --- a/llvm/test/Transforms/Scalarizer/dbgloc-bug-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/dbgloc-bug-inseltpoison.ll @@ -7,7 +7,7 @@ define i16 @f1() !dbg !5 { ret i16 undef, !dbg !9 } -define void @f2() !dbg !10 { +define void @f2(i1 %arg) !dbg !10 { bb1: %_tmp7 = tail call i16 @f1(), !dbg !13 ; CHECK: call i16 @f1(), !dbg !13 @@ -16,7 +16,7 @@ bb1: br label %vector.body vector.body: - br i1 undef, label %middle.block, label %vector.body + br i1 %arg, label %middle.block, label %vector.body middle.block: ret void, !dbg !15 diff --git a/llvm/test/Transforms/Scalarizer/dbgloc-bug.ll b/llvm/test/Transforms/Scalarizer/dbgloc-bug.ll index 639fd3d7c0957..3dd768e593eb4 100644 --- a/llvm/test/Transforms/Scalarizer/dbgloc-bug.ll +++ b/llvm/test/Transforms/Scalarizer/dbgloc-bug.ll @@ -7,7 +7,7 @@ define i16 @f1() !dbg !5 { ret i16 undef, !dbg !9 } -define void @f2() !dbg !10 { +define void @f2(i1 %arg) !dbg !10 { bb1: %_tmp7 = tail call i16 @f1(), !dbg !13 ; CHECK: call i16 @f1(), !dbg !13 @@ -16,7 +16,7 @@ bb1: br label %vector.body vector.body: - br i1 undef, label %middle.block, label %vector.body + br i1 %arg, label %middle.block, label %vector.body middle.block: ret void, !dbg !15 diff --git a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll index 3b6f5ccf3d3ea..8ce912ff55d6e 100644 --- a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll +++ b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes='function(scalarizer)' -S -o - | FileCheck %s -define i16 @f1() { +define i16 @f1(i1 %arg) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_END:%.*]] @@ -9,7 +9,7 @@ define i16 @f1() { ; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x i16> [[INSERT]], i16 ptrtoint (ptr @f1 to i16), i32 0 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: br i1 undef, label [[FOR_BODY:%.*]], label [[FOR_END]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY:%.*]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[PHI_I0:%.*]] = phi i16 [ 1, [[ENTRY:%.*]] ], [ poison, [[FOR_COND]] ] ; CHECK-NEXT: ret i16 [[PHI_I0]] @@ -22,7 +22,7 @@ for.body: br label %for.cond for.cond: - br i1 undef, label %for.body, label %for.end + br i1 %arg, label %for.body, label %for.end for.end: ; opt used to hang when scalarizing this code. When scattering %insert we @@ -34,22 +34,22 @@ for.end: ret i16 %extract } -define void @f2() { +define void @f2(i1 %arg) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END8:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF_THEN:%.*]], label [[IF_END8:%.*]] ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END8]] ; CHECK: for.body2: -; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[FOR_INC:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_END:%.*]], label [[FOR_INC:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[E_SROA_3_2:%.*]] = phi <2 x i64> [ splat (i64 1), [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 6, [[FOR_END]] ], [ [[TMP0]], [[FOR_BODY2]] ] -; CHECK-NEXT: br i1 undef, label [[FOR_BODY2]], label [[FOR_COND1_FOR_END7_CRIT_EDGE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[FOR_BODY2]], label [[FOR_COND1_FOR_END7_CRIT_EDGE:%.*]] ; CHECK: for.cond1.for.end7_crit_edge: ; CHECK-NEXT: br label [[IF_END8]] ; CHECK: if.end8: @@ -61,13 +61,13 @@ entry: br label %for.body for.body: ; preds = %if.end8, %entry - br i1 undef, label %if.then, label %if.end8 + br i1 %arg, label %if.then, label %if.end8 if.then: ; preds = %for.body br label %if.end8 for.body2: ; preds = %for.inc - br i1 undef, label %for.end, label %for.inc + br i1 %arg, label %for.end, label %for.inc for.end: ; preds = %for.body2 br label %for.inc @@ -75,7 +75,7 @@ for.end: ; preds = %for.body2 for.inc: ; preds = %for.end, %for.body2 %e.sroa.3.2 = phi <2 x i64> [ , %for.end ], [ %e.sroa.3.2, %for.body2 ] %0 = phi i32 [ 6, %for.end ], [ %0, %for.body2 ] - br i1 undef, label %for.body2, label %for.cond1.for.end7_crit_edge + br i1 %arg, label %for.body2, label %for.cond1.for.end7_crit_edge for.cond1.for.end7_crit_edge: ; preds = %for.inc br label %if.end8 diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll index d800fca4727e3..ab95b523744be 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll @@ -1,12 +1,12 @@ ; RUN: opt -passes="loop-mssa(simple-loop-unswitch)" -verify-memoryssa -disable-output < %s ; PR10031 -define i32 @test(i32 %command) { +define i32 @test(i32 %command, i1 %arg) { entry: br label %tailrecurse tailrecurse: ; preds = %if.then14, %tailrecurse, %entry - br i1 undef, label %if.then, label %tailrecurse + br i1 %arg, label %if.then, label %tailrecurse if.then: ; preds = %tailrecurse switch i32 %command, label %sw.bb [ @@ -15,7 +15,7 @@ if.then: ; preds = %tailrecurse ] land.lhs.true: ; preds = %if.then, %if.then - br i1 undef, label %sw.bb, label %if.then14 + br i1 %arg, label %sw.bb, label %if.then14 if.then14: ; preds = %land.lhs.true switch i32 %command, label %tailrecurse [ diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll index 8ad869015f44e..229bbb2f3929f 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll @@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0" %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { ptr } %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 } -define void @_Z23get_reconstruction_pathv() uwtable ssp personality ptr @__gxx_personality_v0 { +define void @_Z23get_reconstruction_pathv(i1 %arg) uwtable ssp personality ptr @__gxx_personality_v0 { entry: %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8 br label %for.cond @@ -30,7 +30,7 @@ invoke.cont4: ; preds = %for.cond3 to label %invoke.cont6 unwind label %lpad invoke.cont6: ; preds = %invoke.cont4 - br i1 undef, label %for.cond3, label %for.end + br i1 %arg, label %for.cond3, label %for.end lpad: ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond %0 = landingpad { ptr, i32 } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll index 60608e8df0868..2d24ef4afab07 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll @@ -6,12 +6,12 @@ target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16" -define void @foo() { +define void @foo(i1 %arg) { ; CHECK-LABEL: @foo entry: %arrayidx.i1 = getelementptr inbounds i16, ptr undef, i16 undef %arrayidx.i = addrspacecast ptr %arrayidx.i1 to ptr addrspace(1) - br i1 undef, label %for.body.i, label %bar.exit + br i1 %arg, label %for.body.i, label %bar.exit for.body.i: ; preds = %for.body.i, %entry ; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll b/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll index 9ca554023a8cf..c120eeb440d19 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll @@ -47,7 +47,7 @@ get_out2: ; ; This comes from PR38778 ; CHECK-LABEL: @Test2 -define void @Test2(i32) { +define void @Test2(i32, i1 %arg) { header: br label %loop loop: @@ -58,12 +58,12 @@ loop: ] ; CHECK-NOT: {{^}}guarded1: guarded1: - br i1 undef, label %continue, label %leave + br i1 %arg, label %continue, label %leave guarded2: br label %continue check: %val = add i32 0, 1 - br i1 undef, label %continue, label %leave + br i1 %arg, label %continue, label %leave continue: br label %loop leave: @@ -75,7 +75,7 @@ leave: ; Yet another test from PR38778 ; ; CHECK-LABEL: @Test3 -define void @Test3(i32) { +define void @Test3(i32, i1 %arg) { header: br label %outer outer: @@ -95,7 +95,7 @@ case2: br label %continue continue: %local_11_92 = phi i32 [ 0, %switchme ], [ 18, %case2 ], [ 0, %overflow ] - br i1 undef, label %outer, label %inner + br i1 %arg, label %outer, label %inner go_out: unreachable } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll index 19a2bc3ad0449..45da85299051f 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll @@ -2,7 +2,7 @@ ; PR38283 ; PR38737 -define void @f1() { +define void @f1(i1 %arg) { for.cond1thread-pre-split.lr.ph.lr.ph: %tobool4 = icmp eq i16 undef, 0 br label %for.cond1thread-pre-split @@ -18,7 +18,7 @@ if.end: ; preds = %for.body2 br i1 %tobool4, label %if.end6, label %for.cond1thread-pre-split if.end6: ; preds = %if.end - br i1 undef, label %for.body2, label %for.end + br i1 %arg, label %for.body2, label %for.end for.end: ; preds = %if.end6, %for.body2 ret void diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll index 07668616ff86d..533b1f691f5ad 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --version 5 ; RUN: opt -passes='loop(simple-loop-unswitch),verify' -simple-loop-unswitch-guards -S < %s | FileCheck %s ; RUN: opt -passes='simple-loop-unswitch' -simple-loop-unswitch-guards -S < %s | FileCheck %s ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -simple-loop-unswitch-guards -verify-memoryssa -verify-loop-info -S < %s | FileCheck %s @@ -131,30 +132,55 @@ exit: ret void } -define void @test_nested_loop(i1 %cond, i32 %N) { -; CHECK-LABEL: @test_nested_loop( +define void @test_nested_loop(i1 %cond, i32 %N, i1 %arg) { +; CHECK-LABEL: define void @test_nested_loop(i1 %cond, i32 %N, i1 %arg) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT:%.*]], label [[OUTER_LOOP_SPLIT:%.*]] +; CHECK-NEXT: br i1 %cond, label %entry.split, label %outer_loop.split ; CHECK: entry.split: -; CHECK-NEXT: br label [[OUTER_LOOP:%.*]] +; CHECK-NEXT: br i1 %arg, label %entry.split.split.us, label %entry.split.split +; CHECK: entry.split.split.us: +; CHECK-NEXT: br label %outer_loop.us +; CHECK: outer_loop.us: +; CHECK-NEXT: br label %outer_loop.split.us.us +; CHECK: outer_backedge.us: +; CHECK-NEXT: br label %outer_loop.us +; CHECK: outer_loop.split.us.us: +; CHECK-NEXT: br label %loop.us.us +; CHECK: loop.us.us: +; CHECK-NEXT: %iv.us.us = phi i32 [ 0, %outer_loop.split.us.us ], [ %iv.next.us.us, %guarded.us.us ] +; CHECK-NEXT: br label %guarded.us.us +; CHECK: guarded.us.us: +; CHECK-NEXT: %iv.next.us.us = add i32 %iv.us.us, 1 +; CHECK-NEXT: %loop.cond.us.us = icmp slt i32 %iv.next.us.us, %N +; CHECK-NEXT: br i1 %loop.cond.us.us, label %loop.us.us, label %outer_backedge.split.us.us +; CHECK: outer_backedge.split.us.us: +; CHECK-NEXT: br label %outer_backedge.us +; CHECK: entry.split.split: +; CHECK-NEXT: br label %outer_loop ; CHECK: outer_loop: -; CHECK-NEXT: br label [[OUTER_LOOP_SPLIT_US:%.*]] +; CHECK-NEXT: br label %outer_loop.split.us ; CHECK: outer_loop.split.us: -; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK-NEXT: br label %loop.us ; CHECK: loop.us: -; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[OUTER_LOOP_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ] -; CHECK-NEXT: br label [[GUARDED_US]] +; CHECK-NEXT: %iv.us = phi i32 [ 0, %outer_loop.split.us ], [ %iv.next.us, %guarded.us ] +; CHECK-NEXT: br label %guarded.us ; CHECK: guarded.us: -; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 -; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] -; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[OUTER_BACKEDGE_SPLIT_US:%.*]] +; CHECK-NEXT: %iv.next.us = add i32 %iv.us, 1 +; CHECK-NEXT: %loop.cond.us = icmp slt i32 %iv.next.us, %N +; CHECK-NEXT: br i1 %loop.cond.us, label %loop.us, label %outer_backedge.split.us ; CHECK: outer_backedge.split.us: -; CHECK-NEXT: br label [[OUTER_BACKEDGE:%.*]] +; CHECK-NEXT: br label %outer_backedge +; CHECK: outer_loop.split: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: br label %deopt ; CHECK: deopt: ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ] ; CHECK-NEXT: unreachable ; CHECK: outer_backedge: -; CHECK-NEXT: br i1 false, label [[OUTER_LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br label %exit +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: @@ -171,7 +197,7 @@ loop: br i1 %loop.cond, label %loop, label %outer_backedge outer_backedge: - br i1 undef, label %outer_loop, label %exit + br i1 %arg, label %outer_loop, label %exit exit: ret void diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll index 099d6a5456e8e..7c9bb1b8520d6 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll @@ -29,7 +29,7 @@ if.end: br label %for.inc for.inc: - br i1 undef, label %for.body, label %for.end + br i1 false, label %for.body, label %for.end for.end: ret void diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll b/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll index cde0d9baf7599..51cce1c3e571b 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll @@ -74,13 +74,13 @@ bb10: ; preds = %bb8, %bb ; This is a simplified form of ineqn from above. It triggers some ; different cases in the loop-unswitch code. -define void @simplified_ineqn() nounwind readonly { +define void @simplified_ineqn(i1 %arg) nounwind readonly { entry: br label %bb8.outer bb8.outer: ; preds = %bb6, %bb2, %entry %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; [#uses=1] - br i1 undef, label %return, label %bb2 + br i1 %arg, label %return, label %bb2 bb2: ; preds = %bb switch i32 %x, label %bb6 [ @@ -88,7 +88,7 @@ bb2: ; preds = %bb ] bb6: ; preds = %bb2 - br i1 undef, label %bb8.outer, label %bb2 + br i1 %arg, label %bb8.outer, label %bb2 return: ; preds = %bb8, %bb ret void @@ -97,17 +97,17 @@ return: ; preds = %bb8, %bb ; This function requires special handling to preserve LCSSA form. ; PR4934 -define void @pnp_check_irq() nounwind noredzone { +define void @pnp_check_irq(i1 %arg) nounwind noredzone { entry: %conv56 = trunc i64 undef to i32 ; [#uses=1] br label %while.cond.i while.cond.i: ; preds = %while.cond.i.backedge, %entry %call.i25 = call ptr @pci_get_device() nounwind noredzone ; [#uses=2] - br i1 undef, label %if.then65, label %while.body.i + br i1 %arg, label %if.then65, label %while.body.i while.body.i: ; preds = %while.cond.i - br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge + br i1 %arg, label %if.then31.i.i, label %while.cond.i.backedge while.cond.i.backedge: ; preds = %if.then31.i.i, %while.body.i br label %while.cond.i diff --git a/llvm/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll b/llvm/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll index dbd8f7ab862d0..0656811b39a9e 100644 --- a/llvm/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll +++ b/llvm/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll @@ -1,36 +1,36 @@ ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -disable-output -define void @symhash_add() { +define void @symhash_add(i1 %arg) { entry: - br i1 undef, label %then.0, label %UnifiedReturnBlock + br i1 %arg, label %then.0, label %UnifiedReturnBlock then.0: ; preds = %entry - br i1 undef, label %loopentry.2, label %loopentry.1.preheader + br i1 %arg, label %loopentry.2, label %loopentry.1.preheader loopentry.1.preheader: ; preds = %then.0 br label %loopentry.1.outer loopentry.1.outer: ; preds = %loopexit.1, %loopentry.1.preheader br label %loopentry.1 loopentry.1: ; preds = %endif.1, %then.4, %then.3, %then.1, %loopentry.1.outer - br i1 undef, label %loopexit.1, label %no_exit.1 + br i1 %arg, label %loopexit.1, label %no_exit.1 no_exit.1: ; preds = %loopentry.1 - br i1 undef, label %then.1, label %else.0 + br i1 %arg, label %then.1, label %else.0 then.1: ; preds = %no_exit.1 br label %loopentry.1 else.0: ; preds = %no_exit.1 - br i1 undef, label %then.2, label %else.1 + br i1 %arg, label %then.2, label %else.1 then.2: ; preds = %else.0 - br i1 undef, label %then.3, label %endif.1 + br i1 %arg, label %then.3, label %endif.1 then.3: ; preds = %then.2 br label %loopentry.1 else.1: ; preds = %else.0 - br i1 undef, label %endif.1, label %then.4 + br i1 %arg, label %endif.1, label %then.4 then.4: ; preds = %else.1 br label %loopentry.1 endif.1: ; preds = %else.1, %then.2 br label %loopentry.1 loopexit.1: ; preds = %loopentry.1 - br i1 undef, label %loopentry.1.outer, label %loopentry.2 + br i1 %arg, label %loopentry.1.outer, label %loopentry.2 loopentry.2: ; preds = %no_exit.2, %loopexit.1, %then.0 - br i1 undef, label %loopexit.2, label %no_exit.2 + br i1 %arg, label %loopexit.2, label %no_exit.2 no_exit.2: ; preds = %loopentry.2 br label %loopentry.2 loopexit.2: ; preds = %loopentry.2 diff --git a/llvm/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll b/llvm/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll index 0ec88ed071fc2..20c03e05ba0b9 100644 --- a/llvm/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll +++ b/llvm/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -disable-output ; END. -define void @main(i32 %c) { +define void @main(i32 %c, i1 %arg) { entry: %tmp.9 = icmp eq i32 %c, 2 ; [#uses=1] br i1 %tmp.9, label %endif.0, label %then.0 @@ -186,7 +186,7 @@ no_exit.0.i31.preheader: ; preds = %loopentry.1.i30 no_exit.0.i31: ; preds = %loopexit.3.i, %no_exit.0.i31.preheader br i1 false, label %then.1.i, label %else.0.i then.1.i: ; preds = %no_exit.0.i31 - br i1 undef, label %then.0.i29, label %loopentry.0.i31 + br i1 %arg, label %then.0.i29, label %loopentry.0.i31 then.0.i29: ; preds = %then.1.i unreachable loopentry.0.i31: ; preds = %then.1.i @@ -194,13 +194,13 @@ loopentry.0.i31: ; preds = %then.1.i no_exit.0.i38.preheader: ; preds = %loopentry.0.i31 br label %no_exit.0.i38 no_exit.0.i38: ; preds = %no_exit.0.i38, %no_exit.0.i38.preheader - br i1 undef, label %no_exit.0.i38, label %loopentry.1.i.preheader.loopexit + br i1 %arg, label %no_exit.0.i38, label %loopentry.1.i.preheader.loopexit loopentry.1.i.preheader.loopexit: ; preds = %no_exit.0.i38 br label %loopentry.1.i.preheader loopentry.1.i.preheader: ; preds = %loopentry.1.i.preheader.loopexit, %loopentry.0.i31 br label %loopentry.1.i loopentry.1.i: ; preds = %endif.2.i, %loopentry.1.i.preheader - br i1 undef, label %loopentry.2.i39.preheader, label %loopexit.1.i79.loopexit2 + br i1 %arg, label %loopentry.2.i39.preheader, label %loopexit.1.i79.loopexit2 loopentry.2.i39.preheader: ; preds = %loopentry.1.i br label %loopentry.2.i39 loopentry.2.i39: ; preds = %loopexit.5.i77, %loopentry.2.i39.preheader @@ -212,45 +212,45 @@ loopentry.3.i40: ; preds = %loopexit.3.i51, %loopentry.3.i40.preheader no_exit.3.preheader.i42: ; preds = %loopentry.3.i40 br label %no_exit.3.i49 no_exit.3.i49: ; preds = %no_exit.3.i49, %no_exit.3.preheader.i42 - br i1 undef, label %no_exit.3.i49, label %loopexit.3.i51.loopexit + br i1 %arg, label %no_exit.3.i49, label %loopexit.3.i51.loopexit loopexit.3.i51.loopexit: ; preds = %no_exit.3.i49 br label %loopexit.3.i51 loopexit.3.i51: ; preds = %loopexit.3.i51.loopexit, %loopentry.3.i40 - br i1 undef, label %loopentry.3.i40, label %loopentry.4.i52 + br i1 %arg, label %loopentry.3.i40, label %loopentry.4.i52 loopentry.4.i52: ; preds = %loopexit.3.i51 br i1 false, label %no_exit.4.i54.preheader, label %hamming.exit.i71 no_exit.4.i54.preheader: ; preds = %loopentry.4.i52 br label %no_exit.4.i54 no_exit.4.i54: ; preds = %no_exit.4.backedge.i, %no_exit.4.i54.preheader - br i1 undef, label %then.1.i55, label %endif.1.i56 + br i1 %arg, label %then.1.i55, label %endif.1.i56 then.1.i55: ; preds = %no_exit.4.i54 - br i1 undef, label %no_exit.4.backedge.i, label %loopexit.4.i57 + br i1 %arg, label %no_exit.4.backedge.i, label %loopexit.4.i57 no_exit.4.backedge.i: ; preds = %endif.1.i56, %then.1.i55 br label %no_exit.4.i54 endif.1.i56: ; preds = %no_exit.4.i54 - br i1 undef, label %no_exit.4.backedge.i, label %loopexit.4.i57 + br i1 %arg, label %no_exit.4.backedge.i, label %loopexit.4.i57 loopexit.4.i57: ; preds = %endif.1.i56, %then.1.i55 br i1 false, label %no_exit.i.i69.preheader, label %hamming.exit.i71 no_exit.i.i69.preheader: ; preds = %loopexit.4.i57 br label %no_exit.i.i69 no_exit.i.i69: ; preds = %no_exit.i.i69, %no_exit.i.i69.preheader - br i1 undef, label %no_exit.i.i69, label %hamming.exit.i71.loopexit + br i1 %arg, label %no_exit.i.i69, label %hamming.exit.i71.loopexit hamming.exit.i71.loopexit: ; preds = %no_exit.i.i69 br label %hamming.exit.i71 hamming.exit.i71: ; preds = %hamming.exit.i71.loopexit, %loopexit.4.i57, %loopentry.4.i52, %loopentry.2.i39 - br i1 undef, label %endif.2.i, label %loopentry.5.i72 + br i1 %arg, label %endif.2.i, label %loopentry.5.i72 loopentry.5.i72: ; preds = %hamming.exit.i71 br i1 false, label %shortcirc_next.i74.preheader, label %loopexit.5.i77 shortcirc_next.i74.preheader: ; preds = %loopentry.5.i72 br label %shortcirc_next.i74 shortcirc_next.i74: ; preds = %no_exit.5.i76, %shortcirc_next.i74.preheader - br i1 undef, label %no_exit.5.i76, label %loopexit.5.i77.loopexit + br i1 %arg, label %no_exit.5.i76, label %loopexit.5.i77.loopexit no_exit.5.i76: ; preds = %shortcirc_next.i74 - br i1 undef, label %shortcirc_next.i74, label %loopexit.5.i77.loopexit + br i1 %arg, label %shortcirc_next.i74, label %loopexit.5.i77.loopexit loopexit.5.i77.loopexit: ; preds = %no_exit.5.i76, %shortcirc_next.i74 br label %loopexit.5.i77 loopexit.5.i77: ; preds = %loopexit.5.i77.loopexit, %loopentry.5.i72 - br i1 undef, label %loopentry.2.i39, label %loopexit.1.i79.loopexit + br i1 %arg, label %loopentry.2.i39, label %loopexit.1.i79.loopexit endif.2.i: ; preds = %hamming.exit.i71 br label %loopentry.1.i loopexit.1.i79.loopexit: ; preds = %loopexit.5.i77 @@ -258,7 +258,7 @@ loopexit.1.i79.loopexit: ; preds = %loopexit.5.i77 loopexit.1.i79.loopexit2: ; preds = %loopentry.1.i br label %loopexit.1.i79 loopexit.1.i79: ; preds = %loopexit.1.i79.loopexit2, %loopexit.1.i79.loopexit - br i1 undef, label %then.3.i, label %loopentry.6.i80 + br i1 %arg, label %then.3.i, label %loopentry.6.i80 then.3.i: ; preds = %loopexit.1.i79 br i1 false, label %no_exit.6.i82.preheader, label %run.exit loopentry.6.i80: ; preds = %loopexit.1.i79 @@ -266,7 +266,7 @@ loopentry.6.i80: ; preds = %loopexit.1.i79 no_exit.6.i82.preheader: ; preds = %loopentry.6.i80, %then.3.i br label %no_exit.6.i82 no_exit.6.i82: ; preds = %no_exit.6.i82, %no_exit.6.i82.preheader - br i1 undef, label %no_exit.6.i82, label %run.exit.loopexit + br i1 %arg, label %no_exit.6.i82, label %run.exit.loopexit run.exit.loopexit: ; preds = %no_exit.6.i82 br label %run.exit run.exit: ; preds = %run.exit.loopexit, %loopentry.6.i80, %then.3.i diff --git a/llvm/test/Transforms/SimplifyCFG/branch-on-undef.ll b/llvm/test/Transforms/SimplifyCFG/branch-on-undef.ll index bc42ae60730a7..582a38f4af12f 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-on-undef.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-on-undef.ll @@ -3,10 +3,10 @@ declare void @foo(i32) -define void @br_undef_simple() { +define void @br_undef_simple(i1 %arg) { ; CHECK-LABEL: @br_undef_simple( ; CHECK-NEXT: call void @foo(i32 0) -; CHECK-NEXT: br i1 undef, label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: br i1 %arg, label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: if: @@ -17,7 +17,7 @@ define void @br_undef_simple() { ; CHECK-NEXT: br label [[COMMON_RET]] ; call void @foo(i32 0) - br i1 undef, label %if, label %else + br i1 %arg, label %if, label %else if: call void @foo(i32 1) diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index bd63e6658c9a6..9fb5164d44d45 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -939,7 +939,7 @@ if.end.loopexit: } @f.b = external global i8, align 1 -define void @pr48450_3() { +define void @pr48450_3(i1 %arg) { ; CHECK-LABEL: @pr48450_3( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND1:%.*]] @@ -979,7 +979,7 @@ for.cond.cleanup: br label %cleanup for.body4: - br i1 undef, label %if.then6, label %if.end7 + br i1 %arg, label %if.then6, label %if.end7 if.then6: br label %cleanup diff --git a/llvm/test/Transforms/SimplifyCFG/pr34131.ll b/llvm/test/Transforms/SimplifyCFG/pr34131.ll index 89415cc88aeb1..ed37a969aa47f 100644 --- a/llvm/test/Transforms/SimplifyCFG/pr34131.ll +++ b/llvm/test/Transforms/SimplifyCFG/pr34131.ll @@ -4,7 +4,7 @@ ; Earlier version using auto-generated checks from utils/update_test_checks.py ; had bot problems though... -define void @patatino() { +define void @patatino(i1 %arg) { ; CHECK-LABEL: @patatino @@ -12,16 +12,16 @@ define void @patatino() { bb1: ; preds = %bb36, %0 br label %bb2 bb2: ; preds = %bb3, %bb1 - br i1 undef, label %bb4, label %bb3 + br i1 %arg, label %bb4, label %bb3 bb3: ; preds = %bb4, %bb2 - br i1 undef, label %bb2, label %bb5 + br i1 %arg, label %bb2, label %bb5 bb4: ; preds = %bb2 switch i32 undef, label %bb3 [ ] bb5: ; preds = %bb3 br label %bb6 bb6: ; preds = %bb5 - br i1 undef, label %bb7, label %bb9 + br i1 %arg, label %bb7, label %bb9 bb7: ; preds = %bb6 %tmp = or i64 undef, 1 %tmp8 = icmp ult i64 %tmp, 0 @@ -58,17 +58,17 @@ bb27: ; preds = %bb24 %tmp29 = icmp ult i64 %tmp28, 0 br i1 %tmp29, label %bb30, label %bb9 bb30: ; preds = %bb27 - br i1 undef, label %bb31, label %bb9 + br i1 %arg, label %bb31, label %bb9 bb31: ; preds = %bb30 - br i1 undef, label %bb32, label %bb9 + br i1 %arg, label %bb32, label %bb9 bb32: ; preds = %bb31 - br i1 undef, label %bb33, label %bb9 + br i1 %arg, label %bb33, label %bb9 bb33: ; preds = %bb32 - br i1 undef, label %bb34, label %bb9 + br i1 %arg, label %bb34, label %bb9 bb34: ; preds = %bb33 - br i1 undef, label %bb35, label %bb9 + br i1 %arg, label %bb35, label %bb9 bb35: ; preds = %bb34 - br i1 undef, label %bb36, label %bb9 + br i1 %arg, label %bb36, label %bb9 bb36: ; preds = %bb35 - br i1 undef, label %bb1, label %bb10 + br i1 %arg, label %bb1, label %bb10 } diff --git a/llvm/test/Transforms/Sink/dead-user.ll b/llvm/test/Transforms/Sink/dead-user.ll index 91e61b43ca391..e63aa027a2db8 100644 --- a/llvm/test/Transforms/Sink/dead-user.ll +++ b/llvm/test/Transforms/Sink/dead-user.ll @@ -2,27 +2,26 @@ ; Compiler should not be broken with a dead user. ; RUN: opt -passes=sink -S < %s | FileCheck %s -define void @test(i16 %p1) { -; CHECK-LABEL: define void @test( -; CHECK-SAME: i16 [[P1:%.*]]) { +define void @test(i16 %p1, i1 %arg) { +; CHECK-LABEL: define void @test(i16 %p1, i1 %arg) { ; CHECK-NEXT: bb.0: -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[P1]] to i32 -; CHECK-NEXT: br i1 undef, label [[BB_1:%.*]], label [[BB_3:%.*]] +; CHECK-NEXT: %conv = sext i16 %p1 to i32 +; CHECK-NEXT: br i1 %arg, label %bb.1, label %bb.3 ; CHECK: bb.1: -; CHECK-NEXT: br label [[BB_2:%.*]] +; CHECK-NEXT: br label %bb.2 ; CHECK: bb.2: -; CHECK-NEXT: [[AND_2:%.*]] = and i32 undef, [[CONV]] -; CHECK-NEXT: br label [[BB_2]] +; CHECK-NEXT: %and.2 = and i32 undef, %conv +; CHECK-NEXT: br label %bb.2 ; CHECK: bb.3: -; CHECK-NEXT: [[AND_3:%.*]] = and i32 undef, [[CONV]] -; CHECK-NEXT: br label [[BB_3]] +; CHECK-NEXT: %and.3 = and i32 undef, %conv +; CHECK-NEXT: br label %bb.3 ; CHECK: dead: -; CHECK-NEXT: [[AND_DEAD:%.*]] = and i32 undef, [[CONV]] -; CHECK-NEXT: br label [[DEAD:%.*]] +; CHECK-NEXT: %and.dead = and i32 undef, %conv +; CHECK-NEXT: br label %dead ; bb.0: %conv = sext i16 %p1 to i32 - br i1 undef, label %bb.1, label %bb.3 + br i1 %arg, label %bb.1, label %bb.3 bb.1: ; preds = %bb.0 br label %bb.2 diff --git a/llvm/test/Transforms/SpeculativeExecution/PR46267.ll b/llvm/test/Transforms/SpeculativeExecution/PR46267.ll index 8f82a16639744..5ee9fda470d1e 100644 --- a/llvm/test/Transforms/SpeculativeExecution/PR46267.ll +++ b/llvm/test/Transforms/SpeculativeExecution/PR46267.ll @@ -27,11 +27,11 @@ end: ; preds = %notnull, %entry ret ptr %i6 } -define void @f(i32 %i) { +define void @f(i32 %i, i1 %arg) { entry: ; CHECK-LABEL: @f( ; CHECK: %a2 = add i32 %i, 0 - br i1 undef, label %land.rhs, label %land.end + br i1 %arg, label %land.rhs, label %land.end land.rhs: ; preds = %entry ; CHECK: land.rhs: diff --git a/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll b/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll index 364eb161e298a..eec67e67b540d 100644 --- a/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll +++ b/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll @@ -1,24 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s -define void @test1() { +define void @test1(i1 %arg) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK-NEXT: %arg.inv = xor i1 %arg, true +; CHECK-NEXT: br label %loop ; CHECK: Flow: -; CHECK-NEXT: br label [[FLOW1:%.*]] +; CHECK-NEXT: br label %Flow1 ; CHECK: loop: -; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW1]] ] -; CHECK-NEXT: [[CTR_NEXT:%.*]] = add i32 [[CTR]], 1 -; CHECK-NEXT: br i1 undef, label [[LOOP_A:%.*]], label [[FLOW1]] +; CHECK-NEXT: %ctr = phi i32 [ 0, %entry ], [ %0, %Flow1 ] +; CHECK-NEXT: %ctr.next = add i32 %ctr, 1 +; CHECK-NEXT: br i1 %arg.inv, label %loop.a, label %Flow1 ; CHECK: loop.a: -; CHECK-NEXT: br i1 undef, label [[LOOP_B:%.*]], label [[FLOW:%.*]] +; CHECK-NEXT: br i1 %arg.inv, label %loop.b, label %Flow ; CHECK: loop.b: -; CHECK-NEXT: br label [[FLOW]] +; CHECK-NEXT: br label %Flow ; CHECK: Flow1: -; CHECK-NEXT: [[TMP0]] = phi i32 [ [[CTR_NEXT]], [[FLOW]] ], [ undef, [[LOOP]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: %0 = phi i32 [ %ctr.next, %Flow ], [ undef, %loop ] +; CHECK-NEXT: %1 = phi i1 [ false, %Flow ], [ true, %loop ] +; CHECK-NEXT: br i1 %1, label %exit, label %loop ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -28,10 +29,10 @@ entry: loop: %ctr = phi i32 [ 0, %entry ], [ %ctr.next, %loop.a ], [ %ctr.next, %loop.b ] %ctr.next = add i32 %ctr, 1 - br i1 undef, label %exit, label %loop.a + br i1 %arg, label %exit, label %loop.a loop.a: - br i1 undef, label %loop, label %loop.b + br i1 %arg, label %loop, label %loop.b loop.b: br label %loop From fef54d0393fda144a23d764f96d42f5e1d17ab3e Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Tue, 3 Dec 2024 22:02:00 +0100 Subject: [PATCH 129/191] AMDGPU/GlobalISel: Add skeletons for new register bank select passes (#112862) New register bank select for AMDGPU will be split in two passes: - AMDGPURegBankSelect: select banks based on machine uniformity analysis - AMDGPURegBankLegalize: lower instructions that can't be inst-selected with register banks assigned by AMDGPURegBankSelect. AMDGPURegBankLegalize is similar to legalizer but with context of uniformity analysis. Does not change already assigned banks. Main goal of AMDGPURegBankLegalize is to provide high level table-like overview of how to lower generic instructions based on available target features and uniformity info (uniform vs divergent). See RegBankLegalizeRules. Summary of new features: At the moment register bank select assigns register bank to output register using simple algorithm: - one of the inputs is vgpr output is vgpr - all inputs are sgpr output is sgpr. When function does not contain divergent control flow propagating register banks like this works. In general, first point is still correct but second is not when function contains divergent control flow. Examples: - Phi with uniform inputs that go through divergent branch - Instruction with temporal divergent use. To fix this AMDGPURegBankSelect will use machine uniformity analysis to assign vgpr to each divergent and sgpr to each uniform instruction. But some instructions are only available on VALU (for example floating point instructions before gfx1150) and we need to assign vgpr to them. Since we are no longer propagating register banks we need to ensure that uniform instructions get their inputs in sgpr in some way. In AMDGPURegBankLegalize uniform instructions that are only available on VALU will be reassigned to vgpr on all operands and read-any-lane vgpr output to original sgpr output. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 10 +- .../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 79 + .../lib/Target/AMDGPU/AMDGPURegBankSelect.cpp | 74 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 15 +- llvm/lib/Target/AMDGPU/CMakeLists.txt | 2 + .../regbankselect-mui-regbanklegalize.mir | 858 ++++++++++ .../regbankselect-mui-regbankselect.mir | 858 ++++++++++ .../regbankselect-mui-salu-float.ll | 50 + .../regbankselect-mui-salu-float.mir | 92 ++ .../AMDGPU/GlobalISel/regbankselect-mui.ll | 635 ++++++++ .../AMDGPU/GlobalISel/regbankselect-mui.mir | 1377 +++++++++++++++++ 11 files changed, 4048 insertions(+), 2 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp create mode 100644 llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 1ba6f238342cd..b9769a1baf4d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -29,6 +29,9 @@ void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); void initializeAMDGPURegBankCombinerPass(PassRegistry &); +FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); +FunctionPass *createAMDGPURegBankSelectPass(); +FunctionPass *createAMDGPURegBankLegalizePass(); // SI Passes FunctionPass *createGCNDPPCombinePass(); @@ -36,7 +39,6 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass(); FunctionPass *createSIFoldOperandsLegacyPass(); FunctionPass *createSIPeepholeSDWALegacyPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); -FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); FunctionPass *createSIShrinkInstructionsLegacyPass(); FunctionPass *createSILoadStoreOptimizerLegacyPass(); FunctionPass *createSIWholeQuadModePass(); @@ -186,6 +188,12 @@ extern char &SILowerI1CopiesLegacyID; void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); extern char &AMDGPUGlobalISelDivergenceLoweringID; +void initializeAMDGPURegBankSelectPass(PassRegistry &); +extern char &AMDGPURegBankSelectID; + +void initializeAMDGPURegBankLegalizePass(PassRegistry &); +extern char &AMDGPURegBankLegalizeID; + void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); extern char &AMDGPUMarkLastScratchLoadID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp new file mode 100644 index 0000000000000..283173deaeedc --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -0,0 +1,79 @@ +//===-- AMDGPURegBankLegalize.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Lower G_ instructions that can't be inst-selected with register bank +/// assignment from AMDGPURegBankSelect based on machine uniformity info. +/// Given types on all operands, some register bank assignments require lowering +/// while others do not. +/// Note: cases where all register bank assignments would require lowering are +/// lowered in legalizer. +/// For example vgpr S64 G_AND requires lowering to S32 while sgpr S64 does not. +/// Eliminate sgpr S1 by lowering to sgpr S32. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "amdgpu-regbanklegalize" + +using namespace llvm; + +namespace { + +class AMDGPURegBankLegalize : public MachineFunctionPass { +public: + static char ID; + +public: + AMDGPURegBankLegalize() : MachineFunctionPass(ID) { + initializeAMDGPURegBankLegalizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AMDGPU Register Bank Legalize"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + // If there were no phis and we do waterfall expansion machine verifier would + // fail. + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE, + "AMDGPU Register Bank Legalize", false, false) +INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE, + "AMDGPU Register Bank Legalize", false, false) + +char AMDGPURegBankLegalize::ID = 0; + +char &llvm::AMDGPURegBankLegalizeID = AMDGPURegBankLegalize::ID; + +FunctionPass *llvm::createAMDGPURegBankLegalizePass() { + return new AMDGPURegBankLegalize(); +} + +using namespace AMDGPU; + +bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp new file mode 100644 index 0000000000000..4c499cb4dfe20 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp @@ -0,0 +1,74 @@ +//===-- AMDGPURegBankSelect.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Assign register banks to all register operands of G_ instructions using +/// machine uniformity analysis. +/// Sgpr - uniform values and some lane masks +/// Vgpr - divergent, non S1, values +/// Vcc - divergent S1 values(lane masks) +/// However in some cases G_ instructions with this register bank assignment +/// can't be inst-selected. This is solved in AMDGPURegBankLegalize. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "amdgpu-regbankselect" + +using namespace llvm; + +namespace { + +class AMDGPURegBankSelect : public MachineFunctionPass { +public: + static char ID; + + AMDGPURegBankSelect() : MachineFunctionPass(ID) { + initializeAMDGPURegBankSelectPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AMDGPU Register Bank Select"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + // This pass assigns register banks to all virtual registers, and we maintain + // this property in subsequent passes + MachineFunctionProperties getSetProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::RegBankSelected); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE, + "AMDGPU Register Bank Select", false, false) +INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE, + "AMDGPU Register Bank Select", false, false) + +char AMDGPURegBankSelect::ID = 0; + +char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID; + +FunctionPass *llvm::createAMDGPURegBankSelectPass() { + return new AMDGPURegBankSelect(); +} + +bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a5eb7cbf1bb10..6e2eb254ff60c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -448,6 +448,12 @@ static cl::opt cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden); +static cl::opt NewRegBankSelect( + "new-reg-bank-select", + cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " + "regbankselect"), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -464,6 +470,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGCNDPPCombineLegacyPass(*PR); initializeSILowerI1CopiesLegacyPass(*PR); initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); + initializeAMDGPURegBankSelectPass(*PR); + initializeAMDGPURegBankLegalizePass(*PR); initializeSILowerWWMCopiesPass(*PR); initializeAMDGPUMarkLastScratchLoadPass(*PR); initializeSILowerSGPRSpillsLegacyPass(*PR); @@ -1385,7 +1393,12 @@ void GCNPassConfig::addPreRegBankSelect() { } bool GCNPassConfig::addRegBankSelect() { - addPass(new RegBankSelect()); + if (NewRegBankSelect) { + addPass(createAMDGPURegBankSelectPass()); + addPass(createAMDGPURegBankLegalizePass()); + } else { + addPass(new RegBankSelect()); + } return false; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index b0197c3c6c280..68d141e338a88 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -92,6 +92,8 @@ add_llvm_target(AMDGPUCodeGen AMDGPUPromoteAlloca.cpp AMDGPUPromoteKernelArguments.cpp AMDGPURegBankCombiner.cpp + AMDGPURegBankLegalize.cpp + AMDGPURegBankSelect.cpp AMDGPURegisterBankInfo.cpp AMDGPURemoveIncompatibleFunctions.cpp AMDGPUReserveWWMRegs.cpp diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir new file mode 100644 index 0000000000000..e840c3f1b86ab --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir @@ -0,0 +1,858 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=none %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: uniform_in_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_FPTOUI %0(s32) + %6:_(s32) = G_ADD %5, %1 + G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: back_to_back_uniform_in_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: back_to_back_uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FADD %0, %1 + %7:_(s32) = G_FPTOUI %6(s32) + %8:_(s32) = G_ADD %7, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_uniform +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: buffer_load_uniform + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %5:_(s32) = COPY $sgpr4 + %6:_(s32) = COPY $vgpr0 + %7:_(s32) = COPY $vgpr1 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %11:_(s32) = G_CONSTANT i32 1 + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %16:_(s32) = G_ADD %13, %11 + G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_divergent +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: buffer_load_divergent + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %5:_(s32) = COPY $vgpr0 + %6:_(s32) = COPY $vgpr1 + %7:_(s32) = COPY $vgpr2 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %11:_(s32) = G_CONSTANT i32 1 + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %16:_(s32) = G_ADD %13, %11 + G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_and_i64 +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; CHECK-LABEL: name: vgpr_and_i64 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[MV1]] + ; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s64) = G_AND %2, %5 + G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: abs_sgpr_i16 +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: abs_sgpr_i16 + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) + ; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s16) = G_TRUNC %0(s32) + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s16) = G_ABS %1 + %6:_(s32) = G_ANYEXT %5(s16) + G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1) + S_ENDPGM 0 +... + +--- +name: uniform_i1_phi +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: uniform_i1_phi + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[PHI]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = G_CONSTANT i32 6 + %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 + %7:_(s32) = G_CONSTANT i32 0 + %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7 + G_BRCOND %8(s1), %bb.2 + G_BR %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %9:_(s32) = G_CONSTANT i32 1 + %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9 + + bb.2: + %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 + %12:_(s32) = G_SEXT %11(s1) + %13:_(s32) = G_CONSTANT i32 2 + %14:_(s32) = G_ADD %12, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vcc_to_scc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: vcc_to_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6 + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: scc_to_vcc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: scc_to_vcc + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 0 + %7:_(s1) = G_ICMP intpred(eq), %0(s32), %6 + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_to_vcc_trunc +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: vgpr_to_vcc_trunc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s1) = G_TRUNC %0(s32) + %7:_(s32) = G_SELECT %6(s1), %1, %2 + G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: zext +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: zext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 10 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_ZEXT %5(s1) + G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: sext +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: sext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 10 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_SEXT %5(s1) + G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_vcc +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: and_i1_vcc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 + %7:_(s32) = G_CONSTANT i32 20 + %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 + %9:_(s1) = G_AND %6, %8 + %10:_(s32) = G_SELECT %9(s1), %0, %1 + G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_scc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: and_i1_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 + %7:_(s32) = G_CONSTANT i32 20 + %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 + %9:_(s1) = G_AND %6, %8 + %10:_(s32) = G_SELECT %9(s1), %0, %1 + G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_phi_with_uniform_inputs +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_phi_with_uniform_inputs + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 0 + %5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %7:_(s32) = G_CONSTANT i32 1 + + bb.2: + %8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32) + G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_because_of_temporal_divergent_use +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_because_of_temporal_divergent_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[PHI2]], [[C3]] + ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 -1 + %5:_(s32) = G_CONSTANT i32 0 + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 + %10:_(s32) = G_CONSTANT i32 1 + %9:_(s32) = G_ADD %8, %10 + %11:_(s32) = G_UITOFP %9(s32) + %12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0 + %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) + SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + %13:_(s32) = G_PHI %9(s32), %bb.1 + %14:_(s32) = G_PHI %7(s32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) + %15:_(s32) = G_CONSTANT i32 10 + %16:_(s32) = G_MUL %13, %15 + G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: loop_with_2breaks +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: loop_with_2breaks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 + %11:sreg_32(s1) = IMPLICIT_DEF + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 + %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 + %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 + %18:sreg_32(s1) = COPY %12(s1) + %19:_(s64) = G_SEXT %16(s32) + %20:_(s32) = G_CONSTANT i32 2 + %21:_(s64) = G_SHL %19, %20(s32) + %22:_(p1) = G_PTR_ADD %5, %21(s64) + %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) + %24:_(s32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 + %26:_(s1) = G_CONSTANT i1 true + %27:sreg_32(s1) = COPY %26(s1) + %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc + %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc + %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc + %31:sreg_32(s1) = COPY %30(s1) + %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %33:_(s32) = G_CONSTANT i32 2 + %34:_(s64) = G_SHL %19, %33(s32) + %35:_(p1) = G_PTR_ADD %8, %34(s64) + %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) + %37:_(s32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 + %39:_(s1) = G_CONSTANT i1 true + %40:sreg_32(s1) = COPY %39(s1) + %41:sreg_32(s1) = COPY %40(s1) + %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + + %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 + %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 + %45:sreg_32(s1) = COPY %13(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) + %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) + SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.5(0x80000000) + + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s64) = G_SHL %19, %46(s32) + %48:_(p1) = G_PTR_ADD %2, %47(s64) + %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) + %50:_(s32) = G_CONSTANT i32 1 + %51:_(s32) = G_ADD %49, %50 + G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) + %52:_(s32) = G_ADD %16, %50 + %53:_(s32) = G_CONSTANT i32 100 + %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 + %55:sreg_32(s1) = COPY %54(s1) + %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc + %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + + bb.5: + successors: %bb.3(0x80000000) + + %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 + %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 + %60:sreg_32(s1) = COPY %59(s1) + %61:sreg_32(s1) = COPY %60(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) + %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc + %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc + %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + G_BR %bb.3 + + bb.6: + %64:_(s32) = G_PHI %15(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir new file mode 100644 index 0000000000000..e840c3f1b86ab --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir @@ -0,0 +1,858 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=none %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: uniform_in_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_FPTOUI %0(s32) + %6:_(s32) = G_ADD %5, %1 + G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: back_to_back_uniform_in_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: back_to_back_uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FADD %0, %1 + %7:_(s32) = G_FPTOUI %6(s32) + %8:_(s32) = G_ADD %7, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_uniform +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: buffer_load_uniform + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %5:_(s32) = COPY $sgpr4 + %6:_(s32) = COPY $vgpr0 + %7:_(s32) = COPY $vgpr1 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %11:_(s32) = G_CONSTANT i32 1 + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %16:_(s32) = G_ADD %13, %11 + G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_divergent +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: buffer_load_divergent + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %5:_(s32) = COPY $vgpr0 + %6:_(s32) = COPY $vgpr1 + %7:_(s32) = COPY $vgpr2 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %11:_(s32) = G_CONSTANT i32 1 + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %16:_(s32) = G_ADD %13, %11 + G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_and_i64 +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; CHECK-LABEL: name: vgpr_and_i64 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[MV1]] + ; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s64) = G_AND %2, %5 + G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: abs_sgpr_i16 +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: abs_sgpr_i16 + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) + ; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s16) = G_TRUNC %0(s32) + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s16) = G_ABS %1 + %6:_(s32) = G_ANYEXT %5(s16) + G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1) + S_ENDPGM 0 +... + +--- +name: uniform_i1_phi +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: uniform_i1_phi + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[PHI]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = G_CONSTANT i32 6 + %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 + %7:_(s32) = G_CONSTANT i32 0 + %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7 + G_BRCOND %8(s1), %bb.2 + G_BR %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %9:_(s32) = G_CONSTANT i32 1 + %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9 + + bb.2: + %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 + %12:_(s32) = G_SEXT %11(s1) + %13:_(s32) = G_CONSTANT i32 2 + %14:_(s32) = G_ADD %12, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vcc_to_scc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: vcc_to_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6 + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: scc_to_vcc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: scc_to_vcc + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 0 + %7:_(s1) = G_ICMP intpred(eq), %0(s32), %6 + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_to_vcc_trunc +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: vgpr_to_vcc_trunc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s1) = G_TRUNC %0(s32) + %7:_(s32) = G_SELECT %6(s1), %1, %2 + G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: zext +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: zext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 10 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_ZEXT %5(s1) + G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: sext +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: sext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 10 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_SEXT %5(s1) + G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_vcc +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: and_i1_vcc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 + %7:_(s32) = G_CONSTANT i32 20 + %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 + %9:_(s1) = G_AND %6, %8 + %10:_(s32) = G_SELECT %9(s1), %0, %1 + G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_scc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: and_i1_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 + %7:_(s32) = G_CONSTANT i32 20 + %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 + %9:_(s1) = G_AND %6, %8 + %10:_(s32) = G_SELECT %9(s1), %0, %1 + G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_phi_with_uniform_inputs +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_phi_with_uniform_inputs + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 0 + %5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %7:_(s32) = G_CONSTANT i32 1 + + bb.2: + %8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32) + G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_because_of_temporal_divergent_use +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_because_of_temporal_divergent_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[PHI2]], [[C3]] + ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 -1 + %5:_(s32) = G_CONSTANT i32 0 + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 + %10:_(s32) = G_CONSTANT i32 1 + %9:_(s32) = G_ADD %8, %10 + %11:_(s32) = G_UITOFP %9(s32) + %12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0 + %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) + SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + %13:_(s32) = G_PHI %9(s32), %bb.1 + %14:_(s32) = G_PHI %7(s32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) + %15:_(s32) = G_CONSTANT i32 10 + %16:_(s32) = G_MUL %13, %15 + G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: loop_with_2breaks +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: loop_with_2breaks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 + %11:sreg_32(s1) = IMPLICIT_DEF + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 + %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 + %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 + %18:sreg_32(s1) = COPY %12(s1) + %19:_(s64) = G_SEXT %16(s32) + %20:_(s32) = G_CONSTANT i32 2 + %21:_(s64) = G_SHL %19, %20(s32) + %22:_(p1) = G_PTR_ADD %5, %21(s64) + %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) + %24:_(s32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 + %26:_(s1) = G_CONSTANT i1 true + %27:sreg_32(s1) = COPY %26(s1) + %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc + %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc + %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc + %31:sreg_32(s1) = COPY %30(s1) + %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %33:_(s32) = G_CONSTANT i32 2 + %34:_(s64) = G_SHL %19, %33(s32) + %35:_(p1) = G_PTR_ADD %8, %34(s64) + %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) + %37:_(s32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 + %39:_(s1) = G_CONSTANT i1 true + %40:sreg_32(s1) = COPY %39(s1) + %41:sreg_32(s1) = COPY %40(s1) + %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + + %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 + %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 + %45:sreg_32(s1) = COPY %13(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) + %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) + SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.5(0x80000000) + + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s64) = G_SHL %19, %46(s32) + %48:_(p1) = G_PTR_ADD %2, %47(s64) + %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) + %50:_(s32) = G_CONSTANT i32 1 + %51:_(s32) = G_ADD %49, %50 + G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) + %52:_(s32) = G_ADD %16, %50 + %53:_(s32) = G_CONSTANT i32 100 + %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 + %55:sreg_32(s1) = COPY %54(s1) + %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc + %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + + bb.5: + successors: %bb.3(0x80000000) + + %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 + %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 + %60:sreg_32(s1) = COPY %59(s1) + %61:sreg_32(s1) = COPY %60(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) + %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc + %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc + %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + G_BR %bb.3 + + bb.6: + %64:_(s32) = G_PHI %15(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll new file mode 100644 index 0000000000000..0b4eb458b254f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS_GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=OLD_RBS_GFX12 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=NEW_RBS_GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=NEW_RBS_GFX12 %s + +define amdgpu_ps void @salu_float(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { +; OLD_RBS_GFX10-LABEL: salu_float: +; OLD_RBS_GFX10: ; %bb.0: +; OLD_RBS_GFX10-NEXT: v_add_f32_e64 v2, s0, s1 +; OLD_RBS_GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 +; OLD_RBS_GFX10-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; OLD_RBS_GFX10-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS_GFX10-NEXT: s_endpgm +; +; OLD_RBS_GFX12-LABEL: salu_float: +; OLD_RBS_GFX12: ; %bb.0: +; OLD_RBS_GFX12-NEXT: s_add_f32 s0, s0, s1 +; OLD_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; OLD_RBS_GFX12-NEXT: s_cvt_u32_f32 s0, s0 +; OLD_RBS_GFX12-NEXT: s_add_co_i32 s0, s0, s2 +; OLD_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; OLD_RBS_GFX12-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS_GFX12-NEXT: global_store_b32 v[0:1], v2, off +; OLD_RBS_GFX12-NEXT: s_endpgm +; +; NEW_RBS_GFX10-LABEL: salu_float: +; NEW_RBS_GFX10: ; %bb.0: +; NEW_RBS_GFX10-NEXT: v_add_f32_e64 v2, s0, s1 +; NEW_RBS_GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 +; NEW_RBS_GFX10-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; NEW_RBS_GFX10-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS_GFX10-NEXT: s_endpgm +; +; NEW_RBS_GFX12-LABEL: salu_float: +; NEW_RBS_GFX12: ; %bb.0: +; NEW_RBS_GFX12-NEXT: s_add_f32 s0, s0, s1 +; NEW_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; NEW_RBS_GFX12-NEXT: s_cvt_u32_f32 s0, s0 +; NEW_RBS_GFX12-NEXT: s_add_co_i32 s0, s0, s2 +; NEW_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; NEW_RBS_GFX12-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS_GFX12-NEXT: global_store_b32 v[0:1], v2, off +; NEW_RBS_GFX12-NEXT: s_endpgm + %add = fadd float %a, %b + %add.i32 = fptoui float %add to i32 + %res = add i32 %add.i32, %c + store i32 %res, ptr addrspace(1) %ptr + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir new file mode 100644 index 0000000000000..98a8f4f04e49d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=OLD_RBS_GFX10 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=OLD_RBS_GFX12 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=NEW_RBS_GFX10 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=NEW_RBS_GFX12 + +--- +name: salu_float +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; OLD_RBS_GFX10-LABEL: name: salu_float + ; OLD_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS_GFX10-NEXT: {{ $}} + ; OLD_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; OLD_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; OLD_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; OLD_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; OLD_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS_GFX10-NEXT: S_ENDPGM 0 + ; + ; OLD_RBS_GFX12-LABEL: name: salu_float + ; OLD_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS_GFX12-NEXT: {{ $}} + ; OLD_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] + ; OLD_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) + ; OLD_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; OLD_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; OLD_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS_GFX12-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS_GFX10-LABEL: name: salu_float + ; NEW_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS_GFX10-NEXT: {{ $}} + ; NEW_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; NEW_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; NEW_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; NEW_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; NEW_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS_GFX10-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS_GFX12-LABEL: name: salu_float + ; NEW_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS_GFX12-NEXT: {{ $}} + ; NEW_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] + ; NEW_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) + ; NEW_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; NEW_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; NEW_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS_GFX12-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FADD %0, %1 + %7:_(s32) = G_FPTOUI %6(s32) + %8:_(s32) = G_ADD %7, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll new file mode 100644 index 0000000000000..287a8ab0e52f5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll @@ -0,0 +1,635 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=NEW_RBS %s + +; if instruction is uniform and there is available instruction, select SALU instruction +define amdgpu_ps void @uniform_in_vgpr(float inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: uniform_in_vgpr: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0 +; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: uniform_in_vgpr: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0 +; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %a.i32 = fptoui float %a to i32 + %res = add i32 %a.i32, %b + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; copy sgpr to vgpr + readfirstlane vgpr to sgpr combine from rb-legalize +define amdgpu_ps void @back_to_back_uniform_in_vgpr(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: back_to_back_uniform_in_vgpr: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_add_f32_e64 v2, s0, s1 +; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2 +; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: back_to_back_uniform_in_vgpr: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_add_f32_e64 v2, s0, s1 +; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2 +; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %add = fadd float %a, %b + %add.i32 = fptoui float %add to i32 + %res = add i32 %add.i32, %c + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; fast rules for vector instructions +define amdgpu_cs void @buffer_load_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: buffer_load_uniform: +; OLD_RBS: ; %bb.0: ; %.entry +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s4 +; OLD_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: buffer_load_uniform: +; NEW_RBS: ; %bb.0: ; %.entry +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s4 +; NEW_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm +.entry: + %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) + %el1 = extractelement <4 x i32> %vec, i64 1 + %res = add i32 %el1, 1 + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_cs void @buffer_load_divergent(<4 x i32> inreg %rsrc, i32 %voffset, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: buffer_load_divergent: +; OLD_RBS: ; %bb.0: ; %.entry +; OLD_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4 +; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: buffer_load_divergent: +; NEW_RBS: ; %bb.0: ; %.entry +; NEW_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4 +; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off +; NEW_RBS-NEXT: s_endpgm +.entry: + %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) + %el1 = extractelement <4 x i32> %vec, i64 1 + %res = add i32 %el1, 1 + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +;lowering in rb-legalize (sgpr S64 is legal, vgpr has to be split to S32) +define amdgpu_ps void @vgpr_and_i64(i64 %a, i64 %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: vgpr_and_i64: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_and_b32_e32 v0, v0, v2 +; OLD_RBS-NEXT: v_and_b32_e32 v1, v1, v3 +; OLD_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: vgpr_and_i64: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_and_b32_e32 v0, v0, v2 +; NEW_RBS-NEXT: v_and_b32_e32 v1, v1, v3 +; NEW_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; NEW_RBS-NEXT: s_endpgm + %res = and i64 %a, %b + store i64 %res, ptr addrspace(1) %ptr + ret void +} + +; It is up to user instruction to deal with potential truncated bits in reg. +; Here G_ABS needs to sign extend S16 in reg to S32 and then do S32 G_ABS. +define amdgpu_ps void @abs_sgpr_i16(i16 inreg %arg, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: abs_sgpr_i16: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_sext_i32_i16 s0, s0 +; OLD_RBS-NEXT: s_abs_i32 s0, s0 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_short v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: abs_sgpr_i16: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_sext_i32_i16 s0, s0 +; NEW_RBS-NEXT: s_abs_i32 s0, s0 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_short v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %res = call i16 @llvm.abs.i16(i16 %arg, i1 false) + store i16 %res, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps void @uniform_i1_phi(ptr addrspace(1) %out, i32 inreg %tid, i32 inreg %cond) { +; OLD_RBS-LABEL: uniform_i1_phi: +; OLD_RBS: ; %bb.0: ; %A +; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 6 +; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; OLD_RBS-NEXT: s_cmp_lg_u32 s1, 0 +; OLD_RBS-NEXT: s_cbranch_scc1 .LBB6_2 +; OLD_RBS-NEXT: ; %bb.1: ; %B +; OLD_RBS-NEXT: s_cmp_lt_u32 s0, 1 +; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; OLD_RBS-NEXT: .LBB6_2: ; %exit +; OLD_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000 +; OLD_RBS-NEXT: s_add_i32 s0, s0, 2 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: uniform_i1_phi: +; NEW_RBS: ; %bb.0: ; %A +; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 6 +; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; NEW_RBS-NEXT: s_cmp_lg_u32 s1, 0 +; NEW_RBS-NEXT: s_cbranch_scc1 .LBB6_2 +; NEW_RBS-NEXT: ; %bb.1: ; %B +; NEW_RBS-NEXT: s_cmp_lt_u32 s0, 1 +; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; NEW_RBS-NEXT: .LBB6_2: ; %exit +; NEW_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000 +; NEW_RBS-NEXT: s_add_i32 s0, s0, 2 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm +A: + %val_A = icmp uge i32 %tid, 6 + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %B, label %exit + +B: + %val_B = icmp ult i32 %tid, 1 + br label %exit + +exit: + %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] + %sel = select i1 %phi, i32 1, i32 2 + store i32 %sel, ptr addrspace(1) %out + ret void +} + +; this is kind of i1 readfirstlane +; uniform i1 result on instruction that is only available on VALU +define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: vcc_to_scc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s2 +; OLD_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; OLD_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: vcc_to_scc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s2 +; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; NEW_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %vcc_to_scc = fcmp oeq float %a, 0.0 + %select = select i1 %vcc_to_scc, i32 %b, i32 %c + store i32 %select, ptr addrspace(1) %ptr + ret void +} + +; combiner in rb-legalize recognizes sgpr S1 to vcc copy +define amdgpu_ps void @scc_to_vcc(i32 inreg %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: scc_to_vcc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 0 +; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; OLD_RBS-NEXT: s_and_b32 s0, 1, s0 +; OLD_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: scc_to_vcc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 0 +; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; NEW_RBS-NEXT: s_and_b32 s0, 1, s0 +; NEW_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off +; NEW_RBS-NEXT: s_endpgm + %scc_to_vcc = icmp eq i32 %a, 0 + %select = select i1 %scc_to_vcc, i32 %b, i32 %c + store i32 %select, ptr addrspace(1) %ptr + ret void +} + +; this is only G_TRUNC that is not no-op in global-isel for AMDGPU +define amdgpu_ps void @vgpr_to_vcc_trunc(i32 %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: vgpr_to_vcc_trunc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_and_b32_e32 v0, 1, v0 +; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; OLD_RBS-NEXT: global_store_dword v[3:4], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: vgpr_to_vcc_trunc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_and_b32_e32 v0, 1, v0 +; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; NEW_RBS-NEXT: global_store_dword v[3:4], v0, off +; NEW_RBS-NEXT: s_endpgm + %vcc = trunc i32 %a to i1 + %select = select i1 %vcc, i32 %b, i32 %c + store i32 %select, ptr addrspace(1) %ptr + ret void +} + +; i1 input to zext and sext is something that survived legalizer (not trunc) +; lower to select +define amdgpu_ps void @zext(i32 inreg %a, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: zext: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: zext: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %bool = icmp eq i32 %a, 10 + %zext = zext i1 %bool to i32 + store i32 %zext, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps void @sext(i32 inreg %a, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: sext: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; OLD_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: sext: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; NEW_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %bool = icmp eq i32 %a, 10 + %sext = sext i1 %bool to i32 + store i32 %sext, ptr addrspace(1) %ptr + ret void +} + +; divergent i1 bitwise, i1 vcc. +; inst selected into s_and_b32 on wave32 or s_and_b64 on wave64. +define amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: and_i1_vcc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0 +; OLD_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1 +; OLD_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: and_i1_vcc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0 +; NEW_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1 +; NEW_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off +; NEW_RBS-NEXT: s_endpgm + %cmp_a = icmp uge i32 %a, 10 + %cmp_b = icmp uge i32 %b, 20 + %cc = and i1 %cmp_a, %cmp_b + %res = select i1 %cc, i32 %a, i32 %b + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; uniform i1 bitwise, i32 sgpr. inst selected into s_and_b32. +define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: and_i1_scc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 10 +; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; OLD_RBS-NEXT: s_cmp_ge_u32 s1, 20 +; OLD_RBS-NEXT: s_cselect_b32 s3, 1, 0 +; OLD_RBS-NEXT: s_and_b32 s2, s2, s3 +; OLD_RBS-NEXT: s_and_b32 s2, s2, 1 +; OLD_RBS-NEXT: s_cmp_lg_u32 s2, 0 +; OLD_RBS-NEXT: s_cselect_b32 s0, s0, s1 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: and_i1_scc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 10 +; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; NEW_RBS-NEXT: s_cmp_ge_u32 s1, 20 +; NEW_RBS-NEXT: s_cselect_b32 s3, 1, 0 +; NEW_RBS-NEXT: s_and_b32 s2, s2, s3 +; NEW_RBS-NEXT: s_and_b32 s2, s2, 1 +; NEW_RBS-NEXT: s_cmp_lg_u32 s2, 0 +; NEW_RBS-NEXT: s_cselect_b32 s0, s0, s1 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %cmp_a = icmp uge i32 %a, 10 + %cmp_b = icmp uge i32 %b, 20 + %cc = and i1 %cmp_a, %cmp_b + %res = select i1 %cc, i32 %a, i32 %b + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; old RBS selects sgpr phi because it had sgpr inputs. +define amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) { +; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs: +; OLD_RBS: ; %bb.0: ; %A +; OLD_RBS-NEXT: s_mov_b32 s0, 0 +; OLD_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; OLD_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo +; OLD_RBS-NEXT: ; %bb.1: ; %B +; OLD_RBS-NEXT: s_mov_b32 s0, 1 +; OLD_RBS-NEXT: ; %bb.2: ; %exit +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; OLD_RBS-NEXT: v_mov_b32_e32 v0, s0 +; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: divergent_phi_with_uniform_inputs: +; NEW_RBS: ; %bb.0: ; %A +; NEW_RBS-NEXT: s_mov_b32 s0, 0 +; NEW_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; NEW_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo +; NEW_RBS-NEXT: ; %bb.1: ; %B +; NEW_RBS-NEXT: s_mov_b32 s0, 1 +; NEW_RBS-NEXT: ; %bb.2: ; %exit +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0 +; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off +; NEW_RBS-NEXT: s_endpgm +A: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %B, label %exit + +B: + br label %exit + +exit: + %phi = phi i32 [ 0, %A ], [ 1, %B ] + store i32 %phi, ptr addrspace(1) %out + ret void +} + +; old RBS assigned vgpr to uniform phi (because one input had undetermined bank) +; and it propagated to mul, which was not wrong. +; new RBS assigns vgpr to destination of mul even though both inputs are sgpr. +; TODO: implement temporal divergence lowering +define amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, ptr addrspace(1) %addr) { +; OLD_RBS-LABEL: divergent_because_of_temporal_divergent_use: +; OLD_RBS: ; %bb.0: ; %entry +; OLD_RBS-NEXT: s_mov_b32 s0, -1 +; OLD_RBS-NEXT: v_mov_b32_e32 v3, s0 +; OLD_RBS-NEXT: s_mov_b32 s0, 0 +; OLD_RBS-NEXT: .LBB15_1: ; %loop +; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; OLD_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3 +; OLD_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3 +; OLD_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; OLD_RBS-NEXT: s_or_b32 s0, vcc_lo, s0 +; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; OLD_RBS-NEXT: s_cbranch_execnz .LBB15_1 +; OLD_RBS-NEXT: ; %bb.2: ; %exit +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; OLD_RBS-NEXT: v_mul_lo_u32 v0, v3, 10 +; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: divergent_because_of_temporal_divergent_use: +; NEW_RBS: ; %bb.0: ; %entry +; NEW_RBS-NEXT: s_mov_b32 s0, -1 +; NEW_RBS-NEXT: v_mov_b32_e32 v3, s0 +; NEW_RBS-NEXT: s_mov_b32 s0, 0 +; NEW_RBS-NEXT: .LBB15_1: ; %loop +; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; NEW_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3 +; NEW_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3 +; NEW_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; NEW_RBS-NEXT: s_or_b32 s0, vcc_lo, s0 +; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1 +; NEW_RBS-NEXT: ; %bb.2: ; %exit +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; NEW_RBS-NEXT: v_mul_lo_u32 v0, v3, 10 +; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off +; NEW_RBS-NEXT: s_endpgm +entry: + br label %loop + +loop: + %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ] + %f.counter = uitofp i32 %counter to float + %cond = fcmp ogt float %f.counter, %val + %counter.plus.1 = add i32 %counter, 1 + br i1 %cond, label %exit, label %loop + +exit: + %ceilx10 = mul i32 %counter, 10 + store i32 %ceilx10, ptr addrspace(1) %addr + ret void +} + +; Variables that hande counter can be allocated to sgprs. +; Machine uniformity analysis claims some of those registers are divergent while +; LLVM-IR uniformity analysis claims corresponding values are uniform. +; TODO: fix this in Machine uniformity analysis. +define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { +; OLD_RBS-LABEL: loop_with_2breaks: +; OLD_RBS: ; %bb.0: ; %entry +; OLD_RBS-NEXT: s_mov_b32 s0, 0 +; OLD_RBS-NEXT: ; implicit-def: $sgpr1 +; OLD_RBS-NEXT: v_mov_b32_e32 v6, s0 +; OLD_RBS-NEXT: s_branch .LBB16_3 +; OLD_RBS-NEXT: .LBB16_1: ; %Flow3 +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: s_waitcnt_depctr 0xffe3 +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3 +; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; OLD_RBS-NEXT: s_and_b32 s3, exec_lo, s4 +; OLD_RBS-NEXT: s_or_b32 s1, s1, s3 +; OLD_RBS-NEXT: .LBB16_2: ; %Flow +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, s1 +; OLD_RBS-NEXT: s_or_b32 s0, s2, s0 +; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; OLD_RBS-NEXT: s_cbranch_execz .LBB16_6 +; OLD_RBS-NEXT: .LBB16_3: ; %A +; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; OLD_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, -1 +; OLD_RBS-NEXT: s_or_b32 s1, s1, s2 +; OLD_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] +; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 +; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo +; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; OLD_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo +; OLD_RBS-NEXT: s_cbranch_execz .LBB16_2 +; OLD_RBS-NEXT: ; %bb.4: ; %B +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 +; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo +; OLD_RBS-NEXT: s_mov_b32 s4, -1 +; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; OLD_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo +; OLD_RBS-NEXT: s_cbranch_execz .LBB16_1 +; OLD_RBS-NEXT: ; %bb.5: ; %loop.body +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 +; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo +; OLD_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6 +; OLD_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 +; OLD_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo +; OLD_RBS-NEXT: global_load_dword v9, v[7:8], off +; OLD_RBS-NEXT: v_mov_b32_e32 v6, v10 +; OLD_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo +; OLD_RBS-NEXT: s_or_b32 s4, s4, s5 +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9 +; OLD_RBS-NEXT: global_store_dword v[7:8], v9, off +; OLD_RBS-NEXT: s_branch .LBB16_1 +; OLD_RBS-NEXT: .LBB16_6: ; %exit +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: loop_with_2breaks: +; NEW_RBS: ; %bb.0: ; %entry +; NEW_RBS-NEXT: s_mov_b32 s0, 0 +; NEW_RBS-NEXT: ; implicit-def: $sgpr1 +; NEW_RBS-NEXT: v_mov_b32_e32 v6, s0 +; NEW_RBS-NEXT: s_branch .LBB16_3 +; NEW_RBS-NEXT: .LBB16_1: ; %Flow3 +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: s_waitcnt_depctr 0xffe3 +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3 +; NEW_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; NEW_RBS-NEXT: s_and_b32 s3, exec_lo, s4 +; NEW_RBS-NEXT: s_or_b32 s1, s1, s3 +; NEW_RBS-NEXT: .LBB16_2: ; %Flow +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; NEW_RBS-NEXT: s_and_b32 s2, exec_lo, s1 +; NEW_RBS-NEXT: s_or_b32 s0, s2, s0 +; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; NEW_RBS-NEXT: s_cbranch_execz .LBB16_6 +; NEW_RBS-NEXT: .LBB16_3: ; %A +; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; NEW_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; NEW_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; NEW_RBS-NEXT: s_and_b32 s2, exec_lo, -1 +; NEW_RBS-NEXT: s_or_b32 s1, s1, s2 +; NEW_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] +; NEW_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 +; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo +; NEW_RBS-NEXT: global_load_dword v9, v[9:10], off +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; NEW_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo +; NEW_RBS-NEXT: s_cbranch_execz .LBB16_2 +; NEW_RBS-NEXT: ; %bb.4: ; %B +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 +; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo +; NEW_RBS-NEXT: s_mov_b32 s4, -1 +; NEW_RBS-NEXT: global_load_dword v9, v[9:10], off +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; NEW_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo +; NEW_RBS-NEXT: s_cbranch_execz .LBB16_1 +; NEW_RBS-NEXT: ; %bb.5: ; %loop.body +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 +; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo +; NEW_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6 +; NEW_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 +; NEW_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo +; NEW_RBS-NEXT: global_load_dword v9, v[7:8], off +; NEW_RBS-NEXT: v_mov_b32_e32 v6, v10 +; NEW_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo +; NEW_RBS-NEXT: s_or_b32 s4, s4, s5 +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9 +; NEW_RBS-NEXT: global_store_dword v[7:8], v9, off +; NEW_RBS-NEXT: s_branch .LBB16_1 +; NEW_RBS-NEXT: .LBB16_6: ; %exit +; NEW_RBS-NEXT: s_endpgm +entry: + br label %A + +A: + %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] + %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter + %a.val = load i32, ptr addrspace(1) %a.plus.counter + %a.cond = icmp eq i32 %a.val, 0 + br i1 %a.cond, label %exit, label %B + +B: + %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter + %b.val = load i32, ptr addrspace(1) %b.plus.counter + %b.cond = icmp eq i32 %b.val, 0 + br i1 %b.cond, label %exit, label %loop.body + +loop.body: + %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter + %x.val = load i32, ptr addrspace(1) %x.plus.counter + %x.val.plus.1 = add i32 %x.val, 1 + store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter + %counter.plus.1 = add i32 %counter, 1 + %x.cond = icmp ult i32 %counter, 100 + br i1 %x.cond, label %exit, label %A + +exit: + ret void +} + +declare i16 @llvm.abs.i16(i16, i1) +declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir new file mode 100644 index 0000000000000..ef3a0a3a67594 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir @@ -0,0 +1,1377 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=OLD_RBS +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=NEW_RBS + +--- +name: uniform_in_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: uniform_in_vgpr + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY5]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: uniform_in_vgpr + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY5]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_FPTOUI %0(s32) + %6:_(s32) = G_ADD %5, %1 + G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: back_to_back_uniform_in_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: back_to_back_uniform_in_vgpr + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: back_to_back_uniform_in_vgpr + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FADD %0, %1 + %7:_(s32) = G_FPTOUI %6(s32) + %8:_(s32) = G_ADD %7, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_uniform +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: buffer_load_uniform + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY9]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: buffer_load_uniform + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY9]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %5:_(s32) = COPY $sgpr4 + %6:_(s32) = COPY $vgpr0 + %7:_(s32) = COPY $vgpr1 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %11:_(s32) = G_CONSTANT i32 1 + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %16:_(s32) = G_ADD %13, %11 + G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_divergent +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; OLD_RBS-LABEL: name: buffer_load_divergent + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: buffer_load_divergent + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %5:_(s32) = COPY $vgpr0 + %6:_(s32) = COPY $vgpr1 + %7:_(s32) = COPY $vgpr2 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %11:_(s32) = G_CONSTANT i32 1 + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %16:_(s32) = G_ADD %13, %11 + G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_and_i64 +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; OLD_RBS-LABEL: name: vgpr_and_i64 + ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; OLD_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; OLD_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; OLD_RBS-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: vgpr_and_i64 + ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; NEW_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; NEW_RBS-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s64) = G_AND %2, %5 + G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: abs_sgpr_i16 +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: abs_sgpr_i16 + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) + ; OLD_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: abs_sgpr_i16 + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) + ; NEW_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s16) = G_TRUNC %0(s32) + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s16) = G_ABS %1 + %6:_(s32) = G_ANYEXT %5(s16) + G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1) + S_ENDPGM 0 +... + +--- +name: uniform_i1_phi +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: uniform_i1_phi + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; OLD_RBS-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: G_BRCOND [[ZEXT]](s32), %bb.2 + ; OLD_RBS-NEXT: G_BR %bb.1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) + ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 + ; OLD_RBS-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC3]](s1) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SEXT]], [[C3]] + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: uniform_i1_phi + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; NEW_RBS-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; NEW_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: G_BRCOND [[ZEXT]](s32), %bb.2 + ; NEW_RBS-NEXT: G_BR %bb.1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; NEW_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) + ; NEW_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 + ; NEW_RBS-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC3]](s1) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SEXT]], [[C3]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = G_CONSTANT i32 6 + %6:_(s1) = G_ICMP intpred(uge), %3(s32), %5 + %7:_(s32) = G_CONSTANT i32 0 + %8:_(s1) = G_ICMP intpred(ne), %4(s32), %7 + G_BRCOND %8(s1), %bb.2 + G_BR %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %9:_(s32) = G_CONSTANT i32 1 + %10:_(s1) = G_ICMP intpred(ult), %3(s32), %9 + + bb.2: + %11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1 + %12:_(s32) = G_SEXT %11(s1) + %13:_(s32) = G_CONSTANT i32 2 + %14:_(s32) = G_ADD %12, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vcc_to_scc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: vcc_to_scc + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY7]], [[COPY8]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: vcc_to_scc + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY7]], [[COPY8]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6 + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: scc_to_vcc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; OLD_RBS-LABEL: name: scc_to_vcc + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: scc_to_vcc + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 0 + %7:_(s1) = G_ICMP intpred(eq), %0(s32), %6 + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_to_vcc_trunc +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; OLD_RBS-LABEL: name: vgpr_to_vcc_trunc + ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: vgpr_to_vcc_trunc + ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s1) = G_TRUNC %0(s32) + %7:_(s32) = G_SELECT %6(s1), %1, %2 + G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: zext +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: zext + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ZEXT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: zext + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ZEXT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 10 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_ZEXT %5(s1) + G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: sext +legalized: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: sext + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SEXT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: sext + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SEXT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 10 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_SEXT %5(s1) + G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_vcc +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; OLD_RBS-LABEL: name: and_i1_vcc + ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: and_i1_vcc + ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 + %7:_(s32) = G_CONSTANT i32 20 + %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 + %9:_(s1) = G_AND %6, %8 + %10:_(s32) = G_SELECT %9(s1), %0, %1 + G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_scc +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: and_i1_scc + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC2]](s1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY]], [[COPY1]] + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: and_i1_scc + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; NEW_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) + ; NEW_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC2]](s1) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY]], [[COPY1]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(uge), %0(s32), %5 + %7:_(s32) = G_CONSTANT i32 20 + %8:_(s1) = G_ICMP intpred(uge), %1(s32), %7 + %9:_(s1) = G_AND %6, %8 + %10:_(s32) = G_SELECT %9(s1), %0, %1 + G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_phi_with_uniform_inputs +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: divergent_phi_with_uniform_inputs + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[PHI]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: divergent_phi_with_uniform_inputs + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[PHI]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 0 + %5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %7:_(s32) = G_CONSTANT i32 1 + + bb.2: + %8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32) + G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_because_of_temporal_divergent_use +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: divergent_because_of_temporal_divergent_use + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x80000000) + ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI1]], [[COPY3]] + ; OLD_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[ADD]](s32) + ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; OLD_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 + ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]] + ; OLD_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: divergent_because_of_temporal_divergent_use + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x80000000) + ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI1]], [[COPY3]] + ; NEW_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[ADD]](s32) + ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; NEW_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 + ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]] + ; NEW_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s32) = G_CONSTANT i32 -1 + %5:_(s32) = G_CONSTANT i32 0 + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0 + %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1 + %10:_(s32) = G_CONSTANT i32 1 + %9:_(s32) = G_ADD %8, %10 + %11:_(s32) = G_UITOFP %9(s32) + %12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0 + %7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32) + SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + %13:_(s32) = G_PHI %9(s32), %bb.1 + %14:_(s32) = G_PHI %7(s32), %bb.1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) + %15:_(s32) = G_CONSTANT i32 10 + %16:_(s32) = G_MUL %13, %15 + G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: loop_with_2breaks +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: loop_with_2breaks + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x80000000) + ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; OLD_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[PHI2]](s32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; OLD_RBS-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY7]], [[C1]](s32) + ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY7]](s32), [[ASHR]](s32) + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; OLD_RBS-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY8]](s32) + ; OLD_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; OLD_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY9]] + ; OLD_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C4]](s32) + ; OLD_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C5]](s32) + ; OLD_RBS-NEXT: [[SHL1:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY12]](s32) + ; OLD_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; OLD_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; OLD_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY13]] + ; OLD_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C7]](s32) + ; OLD_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC1]](s1) + ; OLD_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) + ; OLD_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.4 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.3: + ; OLD_RBS-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; OLD_RBS-NEXT: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; OLD_RBS-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI1]](s32) + ; OLD_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.6 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.4: + ; OLD_RBS-NEXT: successors: %bb.5(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[C8]](s32) + ; OLD_RBS-NEXT: [[SHL2:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY17]](s32) + ; OLD_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; OLD_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; OLD_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY18]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; OLD_RBS-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI2]], [[COPY19]] + ; OLD_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 + ; OLD_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C10]](s32) + ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY20]] + ; OLD_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.5: + ; OLD_RBS-NEXT: successors: %bb.3(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; OLD_RBS-NEXT: [[PHI6:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; OLD_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; OLD_RBS-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[COPY22]](s1) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY23]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; OLD_RBS-NEXT: G_BR %bb.3 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.6: + ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: loop_with_2breaks + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x80000000) + ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; NEW_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[PHI2]](s32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; NEW_RBS-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY7]], [[C1]](s32) + ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY7]](s32), [[ASHR]](s32) + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; NEW_RBS-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY8]](s32) + ; NEW_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; NEW_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY9]] + ; NEW_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C4]](s32) + ; NEW_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C5]](s32) + ; NEW_RBS-NEXT: [[SHL1:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY12]](s32) + ; NEW_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; NEW_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; NEW_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY13]] + ; NEW_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C7]](s32) + ; NEW_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC1]](s1) + ; NEW_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) + ; NEW_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.4 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.3: + ; NEW_RBS-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; NEW_RBS-NEXT: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; NEW_RBS-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI1]](s32) + ; NEW_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.6 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.4: + ; NEW_RBS-NEXT: successors: %bb.5(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[C8]](s32) + ; NEW_RBS-NEXT: [[SHL2:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY17]](s32) + ; NEW_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; NEW_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; NEW_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY18]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; NEW_RBS-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI2]], [[COPY19]] + ; NEW_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 + ; NEW_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C10]](s32) + ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY20]] + ; NEW_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.5: + ; NEW_RBS-NEXT: successors: %bb.3(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; NEW_RBS-NEXT: [[PHI6:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; NEW_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; NEW_RBS-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[COPY22]](s1) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY23]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; NEW_RBS-NEXT: G_BR %bb.3 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.6: + ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 + %11:sreg_32(s1) = IMPLICIT_DEF + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 + %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 + %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 + %18:sreg_32(s1) = COPY %12(s1) + %19:_(s64) = G_SEXT %16(s32) + %20:_(s32) = G_CONSTANT i32 2 + %21:_(s64) = G_SHL %19, %20(s32) + %22:_(p1) = G_PTR_ADD %5, %21(s64) + %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) + %24:_(s32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 + %26:_(s1) = G_CONSTANT i1 true + %27:sreg_32(s1) = COPY %26(s1) + %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc + %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc + %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc + %31:sreg_32(s1) = COPY %30(s1) + %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %33:_(s32) = G_CONSTANT i32 2 + %34:_(s64) = G_SHL %19, %33(s32) + %35:_(p1) = G_PTR_ADD %8, %34(s64) + %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) + %37:_(s32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 + %39:_(s1) = G_CONSTANT i1 true + %40:sreg_32(s1) = COPY %39(s1) + %41:sreg_32(s1) = COPY %40(s1) + %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + + %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 + %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 + %45:sreg_32(s1) = COPY %13(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) + %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) + SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.5(0x80000000) + + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s64) = G_SHL %19, %46(s32) + %48:_(p1) = G_PTR_ADD %2, %47(s64) + %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) + %50:_(s32) = G_CONSTANT i32 1 + %51:_(s32) = G_ADD %49, %50 + G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) + %52:_(s32) = G_ADD %16, %50 + %53:_(s32) = G_CONSTANT i32 100 + %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 + %55:sreg_32(s1) = COPY %54(s1) + %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc + %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + + bb.5: + successors: %bb.3(0x80000000) + + %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 + %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 + %60:sreg_32(s1) = COPY %59(s1) + %61:sreg_32(s1) = COPY %60(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) + %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc + %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc + %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + G_BR %bb.3 + + bb.6: + %64:_(s32) = G_PHI %15(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + S_ENDPGM 0 +... From 45ff28746f5f6350a95d8d9a3e3b3a62b932bce9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 3 Dec 2024 21:06:35 +0000 Subject: [PATCH 130/191] [ConstraintSystem] Fix signed overflow in negate. Use AddOverflow for potentially overflowing addition to fixed signed integer overflow. Compile-time impact is in the noise https://llvm-compile-time-tracker.com/compare.php?from=bfb26202e05ee2932b4368b5fca607df01e8247f&to=195b0707148b567c674235e59712458e7ce1bb0e&stat=instructions:u --- llvm/include/llvm/Analysis/ConstraintSystem.h | 4 +++- .../ConstraintElimination/constraint-overflow.ll | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h index 449852343964c..01eeadb17db9f 100644 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -113,7 +113,9 @@ class ConstraintSystem { static SmallVector negate(SmallVector R) { // The negated constraint R is obtained by multiplying by -1 and adding 1 to // the constant. - R[0] += 1; + if (AddOverflow(R[0], int64_t(1), R[0])) + return {}; + return negateOrEqual(R); } diff --git a/llvm/test/Transforms/ConstraintElimination/constraint-overflow.ll b/llvm/test/Transforms/ConstraintElimination/constraint-overflow.ll index 88f87f4afab28..57b7b11be0cf1 100644 --- a/llvm/test/Transforms/ConstraintElimination/constraint-overflow.ll +++ b/llvm/test/Transforms/ConstraintElimination/constraint-overflow.ll @@ -38,3 +38,17 @@ exit: } declare void @llvm.assume(i1) + +define i1 @negate_overflow_add_1(i64 %x) { +; CHECK-LABEL: define i1 @negate_overflow_add_1( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = add nsw i64 [[X]], -9223372036854775807 +; CHECK-NEXT: [[C:%.*]] = icmp slt i64 0, [[SUB]] +; CHECK-NEXT: ret i1 [[C]] +; +entry: + %sub = add nsw i64 %x, -9223372036854775807 + %c = icmp slt i64 0, %sub + ret i1 %c +} From 69e9ceb04343ee661b53a49d61158a0e81250d32 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 3 Dec 2024 21:12:37 +0000 Subject: [PATCH 131/191] [gn build] Port 80987ef4b609 --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index bcb46d919b6c1..ab72ac4ae9f4b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -50,7 +50,6 @@ static_library("CodeGen") { "DFAPacketizer.cpp", "DeadMachineInstructionElim.cpp", "DetectDeadLanes.cpp", - "DroppedVariableStats.cpp", "DwarfEHPrepare.cpp", "EHContGuardCatchret.cpp", "EarlyIfConversion.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index dc01cc9a40a9c..a3f89a5648cb5 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -26,7 +26,6 @@ unittest("CodeGenTests") { "CCStateTest.cpp", "DIEHashTest.cpp", "DIETest.cpp", - "DroppedVariableStatsIRTest.cpp", "DwarfStringPoolEntryRefTest.cpp", "InstrRefLDVTest.cpp", "LexicalScopesTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index b19d54d7ed92f..ccee5d79afdcc 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -28,6 +28,7 @@ unittest("IRTests") { "DemandedBitsTest.cpp", "DominatorTreeBatchUpdatesTest.cpp", "DominatorTreeTest.cpp", + "DroppedVariableStatsTest.cpp", "FunctionTest.cpp", "IRBuilderTest.cpp", "InstructionsTest.cpp", From 7417ba67305747469e9636af68476faf12eefbcc Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 3 Dec 2024 21:12:38 +0000 Subject: [PATCH 132/191] [gn build] Port fef54d0393fd --- llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn index cc24d9f4449cd..745179213ae32 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -179,6 +179,8 @@ static_library("LLVMAMDGPUCodeGen") { "AMDGPUPromoteAlloca.cpp", "AMDGPUPromoteKernelArguments.cpp", "AMDGPURegBankCombiner.cpp", + "AMDGPURegBankLegalize.cpp", + "AMDGPURegBankSelect.cpp", "AMDGPURegisterBankInfo.cpp", "AMDGPURemoveIncompatibleFunctions.cpp", "AMDGPUReserveWWMRegs.cpp", From b5b15c1973935da943e8cee26dc961c6dbe339b9 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 4 Dec 2024 05:21:07 +0800 Subject: [PATCH 133/191] [ast matcher][NFC] make dump_ast_matchers.py run in any path (#117942) --- clang/docs/tools/dump_ast_matchers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py index 705ff0d4d4098..b6f00657ec914 100755 --- a/clang/docs/tools/dump_ast_matchers.py +++ b/clang/docs/tools/dump_ast_matchers.py @@ -5,6 +5,7 @@ import collections import re +import os try: from urllib.request import urlopen @@ -18,7 +19,11 @@ CLASS_INDEX_PAGE = None print("Unable to get %s: %s" % (CLASS_INDEX_PAGE_URL, e)) -MATCHERS_FILE = "../../include/clang/ASTMatchers/ASTMatchers.h" +CURRENT_DIR = os.path.dirname(__file__) +MATCHERS_FILE = os.path.join( + CURRENT_DIR, "../../include/clang/ASTMatchers/ASTMatchers.h" +) +HTML_FILE = os.path.join(CURRENT_DIR, "../LibASTMatchersReference.html") # Each matcher is documented in one row of the form: # result | name | argA @@ -590,7 +595,7 @@ def sort_table(matcher_type, matcher_map): narrowing_matcher_table = sort_table("NARROWING", narrowing_matchers) traversal_matcher_table = sort_table("TRAVERSAL", traversal_matchers) -reference = open("../LibASTMatchersReference.html").read() +reference = open(HTML_FILE).read() reference = re.sub( r"", node_matcher_table, From d5956fb8f999e60af8bede8b17b02ca3a7b7cf4f Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Tue, 3 Dec 2024 13:44:51 -0800 Subject: [PATCH 134/191] [BOLT][AArch64] Add support for short LLD thunks/veneers (#118422) When a callee function is closer than 256MB from its call site, LLD linker can strategically create a short thunk for the function with a single branch instruction (that covers +/-128MB). Detect and convert such thunks into direct calls in BOLT. --- bolt/lib/Passes/VeneerElimination.cpp | 11 ++--- bolt/test/AArch64/veneer-lld-abs.s | 59 +++++++++++++++++++-------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp index b386b2756a2b8..99d0ffeca8cc2 100644 --- a/bolt/lib/Passes/VeneerElimination.cpp +++ b/bolt/lib/Passes/VeneerElimination.cpp @@ -46,16 +46,17 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) { if (BF.isIgnored()) continue; + MCInst &FirstInstruction = *(BF.begin()->begin()); const MCSymbol *VeneerTargetSymbol = 0; uint64_t TargetAddress; - if (BC.MIB->matchAbsLongVeneer(BF, TargetAddress)) { + if (BC.MIB->isTailCall(FirstInstruction)) { + VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction); + } else if (BC.MIB->matchAbsLongVeneer(BF, TargetAddress)) { if (BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(TargetAddress)) VeneerTargetSymbol = TargetBF->getSymbol(); - } else { - MCInst &FirstInstruction = *(BF.begin()->begin()); - if (BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer")) - VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1); + } else if (BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer")) { + VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1); } if (!VeneerTargetSymbol) diff --git a/bolt/test/AArch64/veneer-lld-abs.s b/bolt/test/AArch64/veneer-lld-abs.s index d10ff46e2cb01..7e6fe2d127060 100644 --- a/bolt/test/AArch64/veneer-lld-abs.s +++ b/bolt/test/AArch64/veneer-lld-abs.s @@ -1,5 +1,5 @@ -## Check that llvm-bolt correctly recognizes long absolute thunks generated -## by LLD. +## Check that llvm-bolt correctly recognizes veneers/thunks for absolute code +## generated by LLD. # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \ @@ -12,40 +12,63 @@ .text .balign 4 -.global foo -.type foo, %function -foo: - adrp x1, foo +.global far_function +.type far_function, %function +far_function: ret -.size foo, .-foo +.size far_function, .-far_function + +.global near_function +.type near_function, %function +near_function: + ret +.size near_function, .-near_function + +## Force relocations against .text. +.reloc 0, R_AARCH64_NONE .section ".mytext", "ax" .balign 4 -.global __AArch64AbsLongThunk_foo -.type __AArch64AbsLongThunk_foo, %function -__AArch64AbsLongThunk_foo: +## This version of a thunk is always generated by LLD for function calls +## spanning more than 256MB. +.global __AArch64AbsLongThunk_far_function +.type __AArch64AbsLongThunk_far_function, %function +__AArch64AbsLongThunk_far_function: ldr x16, .L1 br x16 -# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_foo>: +# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_far_function>: # CHECK-INPUT-NEXT: ldr # CHECK-INPUT-NEXT: br .L1: - .quad foo -.size __AArch64AbsLongThunk_foo, .-__AArch64AbsLongThunk_foo + .quad far_function +.size __AArch64AbsLongThunk_far_function, .-__AArch64AbsLongThunk_far_function + +## If a callee is closer than 256MB away, LLD may generate a thunk with a direct +## jump to the callee. Note, that the name might still include "AbsLong". +.global __AArch64AbsLongThunk_near_function +.type __AArch64AbsLongThunk_near_function, %function +__AArch64AbsLongThunk_near_function: + b near_function +# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_near_function>: +# CHECK-INPUT-NEXT: b {{.*}} +.size __AArch64AbsLongThunk_near_function, .-__AArch64AbsLongThunk_near_function -## Check that the thunk was removed from .text and _start() calls foo() +## Check that thunks were removed from .text, and _start calls functions ## directly. -# CHECK-OUTPUT-NOT: __AArch64AbsLongThunk_foo +# CHECK-OUTPUT-NOT: __AArch64AbsLongThunk_{{.*}} .global _start .type _start, %function _start: # CHECK-INPUT-LABEL: <_start>: # CHECK-OUTPUT-LABEL: <_start>: - bl __AArch64AbsLongThunk_foo -# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_foo> -# CHECK-OUTPUT-NEXT: bl {{.*}} + bl __AArch64AbsLongThunk_far_function + bl __AArch64AbsLongThunk_near_function +# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_far_function> +# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_near_function> +# CHECK-OUTPUT-NEXT: bl {{.*}} +# CHECK-OUTPUT-NEXT: bl {{.*}} ret .size _start, .-_start From a9bf16d961e9bb0923b401bc26697c6ca707a1f5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 3 Dec 2024 13:53:20 -0800 Subject: [PATCH 135/191] [StaticAnalyzer] Migrate away from PointerUnion::{is,get} (NFC) (#118421) Note that PointerUnion::{is,get} have been soft deprecated in PointerUnion.h: // FIXME: Replace the uses of is(), get() and dyn_cast() with // isa, cast and the llvm::dyn_cast I'm not touching PointerUnion::dyn_cast for now because it's a bit complicated; we could blindly migrate it to dyn_cast_if_present, but we should probably use dyn_cast when the operand is known to be non-null. --- .../Checkers/MallocSizeofChecker.cpp | 6 +++--- .../lib/StaticAnalyzer/Core/BasicValueFactory.cpp | 8 ++++---- clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp | 10 +++++----- clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 6 +++--- clang/lib/StaticAnalyzer/Core/SVals.cpp | 14 +++++++------- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp index 9e81a6bd19fc5..df23735e4668e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp @@ -211,9 +211,9 @@ class MallocSizeofChecker : public Checker { continue; const TypeSourceInfo *TSI = nullptr; - if (CallRec.CastedExprParent.is()) { - TSI = CallRec.CastedExprParent.get() - ->getTypeSourceInfo(); + if (const auto *VD = + dyn_cast(CallRec.CastedExprParent)) { + TSI = VD->getTypeSourceInfo(); } else { TSI = CallRec.ExplicitCastType; } diff --git a/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp b/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp index b0563b6c070f1..827c04143e658 100644 --- a/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp +++ b/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp @@ -196,13 +196,13 @@ const PointerToMemberData *BasicValueFactory::accumCXXBase( const NamedDecl *ND = nullptr; llvm::ImmutableList BaseSpecList; - if (PTMDT.isNull() || PTMDT.is()) { - if (PTMDT.is()) - ND = PTMDT.get(); + if (PTMDT.isNull() || isa(PTMDT)) { + if (const auto *NDP = dyn_cast_if_present(PTMDT)) + ND = NDP; BaseSpecList = CXXBaseListFactory.getEmptyList(); } else { - const PointerToMemberData *PTMD = PTMDT.get(); + const auto *PTMD = cast(PTMDT); ND = PTMD->getDeclaratorDecl(); BaseSpecList = PTMD->getCXXBaseList(); diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp index 1e0cc2eea9ed8..c4af02f21f494 100644 --- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp @@ -211,9 +211,9 @@ void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) { assert(!getFlag()); GroupStorage &Storage = reinterpret_cast(P); - assert(Storage.is()); + assert(isa(Storage)); Storage = node; - assert(Storage.is()); + assert(isa(Storage)); } void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { @@ -222,7 +222,7 @@ void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { GroupStorage &Storage = reinterpret_cast(P); if (Storage.isNull()) { Storage = N; - assert(Storage.is()); + assert(isa(Storage)); return; } @@ -230,7 +230,7 @@ void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { if (!V) { // Switch from single-node to multi-node representation. - ExplodedNode *Old = Storage.get(); + auto *Old = cast(Storage); BumpVectorContext &Ctx = G.getNodeAllocator(); V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4); @@ -238,7 +238,7 @@ void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { Storage = V; assert(!getFlag()); - assert(Storage.is()); + assert(isa(Storage)); } V->push_back(N, G.getNodeAllocator()); diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index ad4e43630dd44..bbf2303b9f6ef 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1068,10 +1068,10 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D, llvm::PointerUnion V = getStackOrCaptureRegionForDeclContext(LC, DC, D); - if (V.is()) - return V.get(); + if (const auto *VR = dyn_cast_if_present(V)) + return VR; - const auto *STC = V.get(); + const auto *STC = cast(V); if (!STC) { // FIXME: Assign a more sensible memory space to static locals diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 84e7e033404c0..d009552965eca 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -205,10 +205,10 @@ const NamedDecl *nonloc::PointerToMember::getDecl() const { return nullptr; const NamedDecl *ND = nullptr; - if (PTMD.is()) - ND = PTMD.get(); + if (const auto *NDP = dyn_cast(PTMD)) + ND = NDP; else - ND = PTMD.get()->getDeclaratorDecl(); + ND = cast(PTMD)->getDeclaratorDecl(); return ND; } @@ -227,16 +227,16 @@ nonloc::CompoundVal::iterator nonloc::CompoundVal::end() const { nonloc::PointerToMember::iterator nonloc::PointerToMember::begin() const { const PTMDataType PTMD = getPTMData(); - if (PTMD.is()) + if (isa(PTMD)) return {}; - return PTMD.get()->begin(); + return cast(PTMD)->begin(); } nonloc::PointerToMember::iterator nonloc::PointerToMember::end() const { const PTMDataType PTMD = getPTMData(); - if (PTMD.is()) + if (isa(PTMD)) return {}; - return PTMD.get()->end(); + return cast(PTMD)->end(); } //===----------------------------------------------------------------------===// From c8060423fe153fcb8febbebc4a043123b7a29a7b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 3 Dec 2024 16:57:17 -0500 Subject: [PATCH 136/191] [libc++] Drop dependency on __functional/operations.h from (#117302) This should reduce the preprocessed size of the atomic header and other headers in the synchronization library. --- libcxx/include/__atomic/atomic.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h index ae0475693f22b..d83719c8733d7 100644 --- a/libcxx/include/__atomic/atomic.h +++ b/libcxx/include/__atomic/atomic.h @@ -16,7 +16,6 @@ #include <__atomic/memory_order.h> #include <__config> #include <__cstddef/ptrdiff_t.h> -#include <__functional/operations.h> #include <__memory/addressof.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_floating_point.h> @@ -376,7 +375,8 @@ struct atomic<_Tp> : __atomic_base<_Tp> { auto __builtin_op = [](auto __a, auto __builtin_operand, auto __order) { return std::__cxx_atomic_fetch_add(__a, __builtin_operand, __order); }; - return __rmw_op(std::forward<_This>(__self), __operand, __m, std::plus<>{}, __builtin_op); + auto __plus = [](auto __a, auto __b) { return __a + __b; }; + return __rmw_op(std::forward<_This>(__self), __operand, __m, __plus, __builtin_op); } template @@ -384,7 +384,8 @@ struct atomic<_Tp> : __atomic_base<_Tp> { auto __builtin_op = [](auto __a, auto __builtin_operand, auto __order) { return std::__cxx_atomic_fetch_sub(__a, __builtin_operand, __order); }; - return __rmw_op(std::forward<_This>(__self), __operand, __m, std::minus<>{}, __builtin_op); + auto __minus = [](auto __a, auto __b) { return __a - __b; }; + return __rmw_op(std::forward<_This>(__self), __operand, __m, __minus, __builtin_op); } public: From 5522d2462ed261a9c60fda2d56c65978a70a1793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 3 Dec 2024 14:04:49 -0800 Subject: [PATCH 137/191] [flang][cuda] Allow AbstractResult to run in gpu.module (#118529) in CUDA Fortran, device function are converted to `gpu.func` inside the `gpu.module` operation. Update the AbstractResult pass to be able to run on `func.func` and `gpu.func` operations inside the `gpu.module`. --- .../flang/Optimizer/Passes/Pipelines.h | 1 + flang/lib/Optimizer/Passes/Pipelines.cpp | 3 +- .../Optimizer/Transforms/AbstractResult.cpp | 28 +++++++++++++++++-- flang/test/Driver/bbc-mlir-pass-pipeline.f90 | 9 ++++-- .../test/Driver/mlir-debug-pass-pipeline.f90 | 17 ++++++++--- flang/test/Driver/mlir-pass-pipeline.f90 | 22 +++++++++++---- flang/test/Fir/basic-program.fir | 22 +++++++++++---- 7 files changed, 82 insertions(+), 20 deletions(-) diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h index 55fafc2e6b36f..339182605f818 100644 --- a/flang/include/flang/Optimizer/Passes/Pipelines.h +++ b/flang/include/flang/Optimizer/Passes/Pipelines.h @@ -20,6 +20,7 @@ #include "flang/Tools/CrossToolHelpers.h" #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index 0b7b3bafde008..0743fb60aa847 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -16,7 +16,8 @@ namespace fir { void addNestedPassToAllTopLevelOperations(mlir::PassManager &pm, PassConstructor ctor) { addNestedPassToOps(pm, ctor); + mlir::omp::PrivateClauseOp, fir::GlobalOp, + mlir::gpu::GPUModuleOp>(pm, ctor); } void addNestedPassToAllTopLevelOperationsConditionally( diff --git a/flang/lib/Optimizer/Transforms/AbstractResult.cpp b/flang/lib/Optimizer/Transforms/AbstractResult.cpp index e64280508755a..2eca349110f3a 100644 --- a/flang/lib/Optimizer/Transforms/AbstractResult.cpp +++ b/flang/lib/Optimizer/Transforms/AbstractResult.cpp @@ -14,6 +14,7 @@ #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/IR/Diagnostics.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" @@ -331,9 +332,10 @@ class AbstractResultOpt using fir::impl::AbstractResultOptBase< AbstractResultOpt>::AbstractResultOptBase; - void runOnSpecificOperation(mlir::func::FuncOp func, bool shouldBoxResult, - mlir::RewritePatternSet &patterns, - mlir::ConversionTarget &target) { + template + void runOnFunctionLikeOperation(OpTy func, bool shouldBoxResult, + mlir::RewritePatternSet &patterns, + mlir::ConversionTarget &target) { auto loc = func.getLoc(); auto *context = &getContext(); // Convert function type itself if it has an abstract result. @@ -384,6 +386,18 @@ class AbstractResultOpt } } + void runOnSpecificOperation(mlir::func::FuncOp func, bool shouldBoxResult, + mlir::RewritePatternSet &patterns, + mlir::ConversionTarget &target) { + runOnFunctionLikeOperation(func, shouldBoxResult, patterns, target); + } + + void runOnSpecificOperation(mlir::gpu::GPUFuncOp func, bool shouldBoxResult, + mlir::RewritePatternSet &patterns, + mlir::ConversionTarget &target) { + runOnFunctionLikeOperation(func, shouldBoxResult, patterns, target); + } + inline static bool containsFunctionTypeWithAbstractResult(mlir::Type type) { return mlir::TypeSwitch(type) .Case([](fir::BoxProcType boxProc) { @@ -448,6 +462,14 @@ class AbstractResultOpt mlir::TypeSwitch(op) .Case([&](auto op) { runOnSpecificOperation(op, shouldBoxResult, patterns, target); + }) + .Case([&](auto op) { + auto gpuMod = mlir::dyn_cast(*op); + for (auto funcOp : gpuMod.template getOps()) + runOnSpecificOperation(funcOp, shouldBoxResult, patterns, target); + for (auto gpuFuncOp : gpuMod.template getOps()) + runOnSpecificOperation(gpuFuncOp, shouldBoxResult, patterns, + target); }); // Convert the calls and, if needed, the ReturnOp in the function body. diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90 index 5520d750e2ce1..1f09e7ad4c2f5 100644 --- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 +++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90 @@ -17,12 +17,14 @@ ! CHECK-NEXT: (S) 0 num-cse'd - Number of operations CSE'd ! CHECK-NEXT: (S) 0 num-dce'd - Number of operations DCE'd -! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! CHECK-NEXT: 'fir.global' Pipeline ! CHECK-NEXT: CharacterConversion ! CHECK-NEXT: 'func.func' Pipeline ! CHECK-NEXT: ArrayValueCopy ! CHECK-NEXT: CharacterConversion +! CHECK-NEXT: 'gpu.module' Pipeline +! CHECK-NEXT: CharacterConversion ! CHECK-NEXT: 'omp.declare_reduction' Pipeline ! CHECK-NEXT: CharacterConversion ! CHECK-NEXT: 'omp.private' Pipeline @@ -48,13 +50,16 @@ ! CHECK-NEXT: PolymorphicOpConversion ! CHECK-NEXT: AssumedRankOpConversion -! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! CHECK-NEXT: 'fir.global' Pipeline ! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion ! CHECK-NEXT: 'func.func' Pipeline ! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion +! CHECK-NEXT: 'gpu.module' Pipeline +! CHECK-NEXT: StackReclaim +! CHECK-NEXT: CFGConversion ! CHECK-NEXT: 'omp.declare_reduction' Pipeline ! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90 index ab5ddedf5fc18..4326953421e4b 100644 --- a/flang/test/Driver/mlir-debug-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90 @@ -28,11 +28,13 @@ ! ALL: Pass statistics report ! ALL: Fortran::lower::VerifierPass -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: InlineElementals ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: InlineElementals +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: InlineElementals ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: InlineElementals ! ALL-NEXT: 'omp.private' Pipeline @@ -49,12 +51,14 @@ ! ALL-NEXT: (S) 0 num-cse'd - Number of operations CSE'd ! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: CharacterConversion ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: ArrayValueCopy ! ALL-NEXT: CharacterConversion +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: CharacterConversion ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: CharacterConversion ! ALL-NEXT: 'omp.private' Pipeline @@ -78,13 +82,16 @@ ! ALL-NEXT: PolymorphicOpConversion ! ALL-NEXT: AssumedRankOpConversion -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: StackReclaim +! ALL-NEXT: CFGConversion ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion @@ -99,11 +106,13 @@ ! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd ! ALL-NEXT: BoxedProcedurePass -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: AbstractResultOpt ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: AbstractResultOpt +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: AbstractResultOpt ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: AbstractResultOpt ! ALL-NEXT: 'omp.private' Pipeline diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 index 33c8183b27aef..6ffdbb0234e85 100644 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -16,13 +16,16 @@ ! ALL: Fortran::lower::VerifierPass ! O2-NEXT: Canonicalizer -! ALL: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT:'fir.global' Pipeline ! O2-NEXT: SimplifyHLFIRIntrinsics ! ALL: InlineElementals ! ALL-NEXT:'func.func' Pipeline ! O2-NEXT: SimplifyHLFIRIntrinsics ! ALL: InlineElementals +! ALL-NEXT:'gpu.module' Pipeline +! O2-NEXT: SimplifyHLFIRIntrinsics +! ALL: InlineElementals ! ALL-NEXT:'omp.declare_reduction' Pipeline ! O2-NEXT: SimplifyHLFIRIntrinsics ! ALL: InlineElementals @@ -33,11 +36,13 @@ ! O2-NEXT: CSE ! O2-NEXT: (S) {{.*}} num-cse'd ! O2-NEXT: (S) {{.*}} num-dce'd -! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! O2-NEXT: 'fir.global' Pipeline ! O2-NEXT: OptimizedBufferization ! O2-NEXT: 'func.func' Pipeline ! O2-NEXT: OptimizedBufferization +! O2-NEXT: 'gpu.module' Pipeline +! O2-NEXT: OptimizedBufferization ! O2-NEXT: 'omp.declare_reduction' Pipeline ! O2-NEXT: OptimizedBufferization ! O2-NEXT: 'omp.private' Pipeline @@ -54,12 +59,14 @@ ! ALL-NEXT: (S) 0 num-cse'd - Number of operations CSE'd ! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: CharacterConversion ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: ArrayValueCopy ! ALL-NEXT: CharacterConversion +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: CharacterConversion ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: CharacterConversion ! ALL-NEXT: 'omp.private' Pipeline @@ -86,13 +93,16 @@ ! ALL-NEXT: AssumedRankOpConversion ! O2-NEXT: AddAliasTags -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: StackReclaim +! ALL-NEXT: CFGConversion ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion @@ -108,11 +118,13 @@ ! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd ! ALL-NEXT: BoxedProcedurePass -! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline ! ALL-NEXT: AbstractResultOpt ! ALL-NEXT: 'func.func' Pipeline ! ALL-NEXT: AbstractResultOpt +! ALL-NEXT: 'gpu.module' Pipeline +! ALL-NEXT: AbstractResultOpt ! ALL-NEXT: 'omp.declare_reduction' Pipeline ! ALL-NEXT: AbstractResultOpt ! ALL-NEXT: 'omp.private' Pipeline diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index ad5201af8311f..50b91ce340b3a 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -17,13 +17,16 @@ func.func @_QQmain() { // PASSES: Pass statistics report // PASSES: Canonicalizer -// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] // PASSES-NEXT: 'fir.global' Pipeline // PASSES-NEXT: SimplifyHLFIRIntrinsics // PASSES-NEXT: InlineElementals // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: SimplifyHLFIRIntrinsics // PASSES-NEXT: InlineElementals +// PASSES-NEXT: 'gpu.module' Pipeline +// PASSES-NEXT: SimplifyHLFIRIntrinsics +// PASSES-NEXT: InlineElementals // PASSES-NEXT: 'omp.declare_reduction' Pipeline // PASSES-NEXT: SimplifyHLFIRIntrinsics // PASSES-NEXT: InlineElementals @@ -34,11 +37,13 @@ func.func @_QQmain() { // PASSES-NEXT: CSE // PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd // PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd -// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] // PASSES-NEXT: 'fir.global' Pipeline // PASSES-NEXT: OptimizedBufferization // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: OptimizedBufferization +// PASSES-NEXT: 'gpu.module' Pipeline +// PASSES-NEXT: OptimizedBufferization // PASSES-NEXT: 'omp.declare_reduction' Pipeline // PASSES-NEXT: OptimizedBufferization // PASSES-NEXT: 'omp.private' Pipeline @@ -52,12 +57,14 @@ func.func @_QQmain() { // PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd // PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd -// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] // PASSES-NEXT: 'fir.global' Pipeline // PASSES-NEXT: CharacterConversion // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: ArrayValueCopy // PASSES-NEXT: CharacterConversion +// PASSES-NEXT: 'gpu.module' Pipeline +// PASSES-NEXT: CharacterConversion // PASSES-NEXT: 'omp.declare_reduction' Pipeline // PASSES-NEXT: CharacterConversion // PASSES-NEXT: 'omp.private' Pipeline @@ -84,13 +91,16 @@ func.func @_QQmain() { // PASSES-NEXT: AssumedRankOpConversion // PASSES-NEXT: AddAliasTags -// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] // PASSES-NEXT: 'fir.global' Pipeline // PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion +// PASSES-NEXT: 'gpu.module' Pipeline +// PASSES-NEXT: StackReclaim +// PASSES-NEXT: CFGConversion // PASSES-NEXT: 'omp.declare_reduction' Pipeline // PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion @@ -106,11 +116,13 @@ func.func @_QQmain() { // PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd // PASSES-NEXT: BoxedProcedurePass -// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] +// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private'] // PASSES-NEXT: 'fir.global' Pipeline // PASSES-NEXT: AbstractResultOpt // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: AbstractResultOpt +// PASSES-NEXT: 'gpu.module' Pipeline +// PASSES-NEXT: AbstractResultOpt // PASSES-NEXT: 'omp.declare_reduction' Pipeline // PASSES-NEXT: AbstractResultOpt // PASSES-NEXT: 'omp.private' Pipeline From 0c8928d456ac3ef23ed25bfc9e5d491dd7b62a11 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 3 Dec 2024 14:13:06 -0800 Subject: [PATCH 138/191] Reland "[NFC] Move DroppedVariableStats to its own file and redesign it to be extensible. (#117042)" (#118546) Removed the virtual destructor in the derived class DroppedVariableStatsIR --- ...s-to-collect-dropped-var-stats-for-M.patch | 1301 +++++++++++++++++ ...DroppedVariableStats-to-its-own-file.patch | 1045 +++++++++++++ .../llvm/CodeGen/DroppedVariableStats.h | 224 +++ .../llvm/Passes/StandardInstrumentations.h | 80 +- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/DroppedVariableStats.cpp | 194 +++ llvm/lib/Passes/StandardInstrumentations.cpp | 178 +-- llvm/unittests/CodeGen/CMakeLists.txt | 1 + .../DroppedVariableStatsIRTest.cpp} | 74 +- llvm/unittests/IR/CMakeLists.txt | 1 - 10 files changed, 2800 insertions(+), 299 deletions(-) create mode 100644 0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch create mode 100644 0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch create mode 100644 llvm/include/llvm/CodeGen/DroppedVariableStats.h create mode 100644 llvm/lib/CodeGen/DroppedVariableStats.cpp rename llvm/unittests/{IR/DroppedVariableStatsTest.cpp => CodeGen/DroppedVariableStatsIRTest.cpp} (91%) diff --git a/0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch b/0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch new file mode 100644 index 0000000000000..95c0a0b54f7e2 --- /dev/null +++ b/0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch @@ -0,0 +1,1301 @@ +From 8f00eaaa595c1b908d43b1de288e3c03f1f998bf Mon Sep 17 00:00:00 2001 +From: Shubham Sandeep Rastogi +Date: Mon, 18 Nov 2024 16:06:59 -0800 +Subject: [PATCH] Reland "Add a pass to collect dropped var stats for MIR" + +Moved the MIR Test to the unittests/CodeGen folder +--- + .../llvm/CodeGen/DroppedVariableStats.h | 48 +- + .../llvm/CodeGen/MachineFunctionPass.h | 2 + + llvm/lib/CodeGen/DroppedVariableStats.cpp | 63 +- + llvm/lib/CodeGen/MachineFunctionPass.cpp | 15 +- + llvm/unittests/CodeGen/CMakeLists.txt | 1 + + .../CodeGen/DroppedVariableStatsMIRTest.cpp | 1067 +++++++++++++++++ + 6 files changed, 1193 insertions(+), 3 deletions(-) + create mode 100644 llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp + +diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h +index 371d775b02e8..f6050c68c91a 100644 +--- a/llvm/include/llvm/CodeGen/DroppedVariableStats.h ++++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h +@@ -7,7 +7,7 @@ + ///===---------------------------------------------------------------------===// + /// \file + /// Dropped Variable Statistics for Debug Information. Reports any number +-/// of #dbg_value that get dropped due to an optimization pass. ++/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. + /// + ///===---------------------------------------------------------------------===// + +@@ -221,6 +221,52 @@ private: + } + }; + ++/// A class to collect and print dropped debug information due to MIR ++/// optimization passes. After every MIR pass is run, it will print how many ++/// #DBG_VALUEs were dropped due to that pass. ++class DroppedVariableStatsMIR : public DroppedVariableStats { ++public: ++ DroppedVariableStatsMIR() : llvm::DroppedVariableStats(false) {} ++ ++ void runBeforePass(StringRef PassID, MachineFunction *MF) { ++ if (PassID == "Debug Variable Analysis") ++ return; ++ setup(); ++ return runOnMachineFunction(MF, true); ++ } ++ ++ void runAfterPass(StringRef PassID, MachineFunction *MF) { ++ if (PassID == "Debug Variable Analysis") ++ return; ++ runOnMachineFunction(MF, false); ++ calculateDroppedVarStatsOnMachineFunction(MF, PassID, MF->getName().str()); ++ cleanup(); ++ } ++ ++private: ++ const MachineFunction *MFunc; ++ /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or ++ /// after a pass has run to facilitate dropped variable calculation for an ++ /// llvm::MachineFunction. ++ void runOnMachineFunction(const MachineFunction *MF, bool Before); ++ /// Iterate over all Instructions in a MachineFunction and report any dropped ++ /// debug information. ++ void calculateDroppedVarStatsOnMachineFunction(const MachineFunction *MF, ++ StringRef PassID, ++ StringRef FuncOrModName); ++ /// Override base class method to run on an llvm::MachineFunction ++ /// specifically. ++ virtual void ++ visitEveryInstruction(unsigned &DroppedCount, ++ DenseMap &InlinedAtsMap, ++ VarID Var) override; ++ /// Override base class method to run on DBG_VALUEs specifically. ++ virtual void visitEveryDebugRecord( ++ DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before) override; ++}; ++ + } // namespace llvm + + #endif +diff --git a/llvm/include/llvm/CodeGen/MachineFunctionPass.h b/llvm/include/llvm/CodeGen/MachineFunctionPass.h +index caaf22c2139e..d82b593497ff 100644 +--- a/llvm/include/llvm/CodeGen/MachineFunctionPass.h ++++ b/llvm/include/llvm/CodeGen/MachineFunctionPass.h +@@ -18,6 +18,7 @@ + #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H + #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H + ++#include "llvm/CodeGen/DroppedVariableStats.h" + #include "llvm/CodeGen/MachineFunction.h" + #include "llvm/Pass.h" + +@@ -67,6 +68,7 @@ private: + MachineFunctionProperties RequiredProperties; + MachineFunctionProperties SetProperties; + MachineFunctionProperties ClearedProperties; ++ DroppedVariableStatsMIR DroppedVarStatsMF; + + /// createPrinterPass - Get a machine function printer pass. + Pass *createPrinterPass(raw_ostream &O, +diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp +index 122fcad1293f..71f91292160f 100644 +--- a/llvm/lib/CodeGen/DroppedVariableStats.cpp ++++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp +@@ -7,7 +7,7 @@ + ///===---------------------------------------------------------------------===// + /// \file + /// Dropped Variable Statistics for Debug Information. Reports any number +-/// of #dbg_value that get dropped due to an optimization pass. ++/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. + /// + ///===---------------------------------------------------------------------===// + +@@ -192,3 +192,64 @@ void DroppedVariableStatsIR::visitEveryDebugRecord( + } + } + } ++ ++void DroppedVariableStatsMIR::runOnMachineFunction(const MachineFunction *MF, ++ bool Before) { ++ auto &DebugVariables = DebugVariablesStack.back()[&MF->getFunction()]; ++ auto FuncName = MF->getName(); ++ MFunc = MF; ++ run(DebugVariables, FuncName, Before); ++} ++ ++void DroppedVariableStatsMIR::calculateDroppedVarStatsOnMachineFunction( ++ const MachineFunction *MF, StringRef PassID, StringRef FuncOrModName) { ++ MFunc = MF; ++ StringRef FuncName = MF->getName(); ++ const Function *Func = &MF->getFunction(); ++ DebugVariables &DbgVariables = DebugVariablesStack.back()[Func]; ++ calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, ++ "MachineFunction", Func); ++} ++ ++void DroppedVariableStatsMIR::visitEveryInstruction( ++ unsigned &DroppedCount, DenseMap &InlinedAtsMap, ++ VarID Var) { ++ unsigned PrevDroppedCount = DroppedCount; ++ const DIScope *DbgValScope = std::get<0>(Var); ++ for (const auto &MBB : *MFunc) { ++ for (const auto &MI : MBB) { ++ if (!MI.isDebugInstr()) { ++ auto *DbgLoc = MI.getDebugLoc().get(); ++ if (!DbgLoc) ++ continue; ++ ++ auto *Scope = DbgLoc->getScope(); ++ if (updateDroppedCount(DbgLoc, Scope, DbgValScope, InlinedAtsMap, Var, ++ DroppedCount)) ++ break; ++ } ++ } ++ if (PrevDroppedCount != DroppedCount) { ++ PrevDroppedCount = DroppedCount; ++ break; ++ } ++ } ++} ++ ++void DroppedVariableStatsMIR::visitEveryDebugRecord( ++ DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before) { ++ for (const auto &MBB : *MFunc) { ++ for (const auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ auto *DbgVar = MI.getDebugVariable(); ++ if (!DbgVar) ++ continue; ++ auto DbgLoc = MI.getDebugLoc(); ++ populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, ++ FuncName, Before); ++ } ++ } ++ } ++} +diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp +index 62ac3e32d24d..e803811643f8 100644 +--- a/llvm/lib/CodeGen/MachineFunctionPass.cpp ++++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp +@@ -32,6 +32,11 @@ + using namespace llvm; + using namespace ore; + ++static cl::opt DroppedVarStatsMIR( ++ "dropped-variable-stats-mir", cl::Hidden, ++ cl::desc("Dump dropped debug variables stats for MIR passes"), ++ cl::init(false)); ++ + Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return createMachineFunctionPrinterPass(O, Banner); +@@ -91,7 +96,15 @@ bool MachineFunctionPass::runOnFunction(Function &F) { + + MFProps.reset(ClearedProperties); + +- bool RV = runOnMachineFunction(MF); ++ bool RV; ++ if (DroppedVarStatsMIR) { ++ auto PassName = getPassName(); ++ DroppedVarStatsMF.runBeforePass(PassName, &MF); ++ RV = runOnMachineFunction(MF); ++ DroppedVarStatsMF.runAfterPass(PassName, &MF); ++ } else { ++ RV = runOnMachineFunction(MF); ++ } + + if (ShouldEmitSizeRemarks) { + // We wanted size remarks. Check if there was a change to the number of +diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt +index 807fd1a9b7b5..50ef1bb5b7af 100644 +--- a/llvm/unittests/CodeGen/CMakeLists.txt ++++ b/llvm/unittests/CodeGen/CMakeLists.txt +@@ -28,6 +28,7 @@ add_llvm_unittest(CodeGenTests + DIEHashTest.cpp + DIETest.cpp + DroppedVariableStatsIRTest.cpp ++ DroppedVariableStatsMIRTest.cpp + DwarfStringPoolEntryRefTest.cpp + InstrRefLDVTest.cpp + LowLevelTypeTest.cpp +diff --git a/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp +new file mode 100644 +index 000000000000..b26a89c7adcb +--- /dev/null ++++ b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp +@@ -0,0 +1,1067 @@ ++//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests ++//----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/AsmParser/Parser.h" ++#include "llvm/CodeGen/MIRParser/MIRParser.h" ++#include "llvm/CodeGen/MachineModuleInfo.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/LegacyPassManager.h" ++#include "llvm/IR/Module.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Pass.h" ++#include "llvm/Passes/StandardInstrumentations.h" ++#include "llvm/Support/TargetSelect.h" ++#include "llvm/Target/TargetMachine.h" ++#include "gtest/gtest.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++using namespace llvm; ++ ++namespace { ++ ++std::unique_ptr ++createTargetMachine(std::string TT, StringRef CPU, StringRef FS) { ++ std::string Error; ++ const Target *T = TargetRegistry::lookupTarget(TT, Error); ++ if (!T) ++ return nullptr; ++ TargetOptions Options; ++ return std::unique_ptr( ++ static_cast(T->createTargetMachine( ++ TT, CPU, FS, Options, std::nullopt, std::nullopt))); ++} ++ ++std::unique_ptr parseMIR(const TargetMachine &TM, StringRef MIRCode, ++ MachineModuleInfo &MMI, LLVMContext *Context) { ++ SMDiagnostic Diagnostic; ++ std::unique_ptr M; ++ std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); ++ auto MIR = createMIRParser(std::move(MBuffer), *Context); ++ if (!MIR) ++ return nullptr; ++ ++ std::unique_ptr Mod = MIR->parseIRModule(); ++ if (!Mod) ++ return nullptr; ++ ++ Mod->setDataLayout(TM.createDataLayout()); ++ ++ if (MIR->parseMachineFunctions(*Mod, MMI)) { ++ M.reset(); ++ return nullptr; ++ } ++ return Mod; ++} ++// This test ensures that if a DBG_VALUE and an instruction that exists in the ++// same scope as that DBG_VALUE are both deleted as a result of an optimization ++// pass, debug information is considered not dropped. ++TEST(DroppedVariableStatsMIR, BothDeleted) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4) ++ !12 = !DILocation(line: 2, column: 11, scope: !4) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ for (auto &MI : MBB) { ++ auto *DbgLoc = MI.getDebugLoc().get(); ++ if (DbgLoc) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), false); ++} ++ ++// This test ensures that if a DBG_VALUE is dropped after an optimization pass, ++// but an instruction that shares the same scope as the DBG_VALUE still exists, ++// debug information is conisdered dropped. ++TEST(DroppedVariableStatsMIR, DbgValLost) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4) ++ !12 = !DILocation(line: 2, column: 11, scope: !4) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), true); ++} ++ ++// This test ensures that if a #dbg_value is dropped after an optimization pass, ++// but an instruction that has an unrelated scope as the #dbg_value still ++// exists, debug information is conisdered not dropped. ++TEST(DroppedVariableStatsMIR, UnrelatedScopes) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4) ++ !12 = !DILocation(line: 2, column: 11, scope: !13) ++ !13 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), false); ++} ++ ++// This test ensures that if a #dbg_value is dropped after an optimization pass, ++// but an instruction that has a scope which is a child of the #dbg_value scope ++// still exists, debug information is conisdered dropped. ++TEST(DroppedVariableStatsMIR, ChildScopes) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4) ++ !12 = !DILocation(line: 2, column: 11, scope: !13) ++ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), true); ++} ++ ++// This test ensures that if a DBG_VALUE is dropped after an optimization pass, ++// but an instruction that has a scope which is a child of the DBG_VALUE scope ++// still exists, and the DBG_VALUE is inlined at another location, debug ++// information is conisdered not dropped. ++TEST(DroppedVariableStatsMIR, InlinedAt) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) ++ !12 = !DILocation(line: 2, column: 11, scope: !13) ++ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) ++ !14 = !DILocation(line: 3, column: 2, scope: !4) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), false); ++} ++ ++// This test ensures that if a DBG_VALUE is dropped after an optimization pass, ++// but an instruction that has a scope which is a child of the DBG_VALUE scope ++// still exists, and the DBG_VALUE and the instruction are inlined at another ++// location, debug information is conisdered dropped. ++TEST(DroppedVariableStatsMIR, InlinedAtShared) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) ++ !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !14) ++ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) ++ !14 = !DILocation(line: 3, column: 2, scope: !4) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), true); ++} ++ ++// This test ensures that if a DBG_VALUE is dropped after an optimization pass, ++// but an instruction that has a scope which is a child of the DBG_VALUE scope ++// still exists, and the instruction is inlined at a location that is the ++// DBG_VALUE's inlined at location, debug information is conisdered dropped. ++TEST(DroppedVariableStatsMIR, InlinedAtChild) { ++ InitializeAllTargetInfos(); ++ InitializeAllTargets(); ++ InitializeAllTargetMCs(); ++ PassInstrumentationCallbacks PIC; ++ PassInstrumentation PI(&PIC); ++ ++ LLVMContext C; ++ ++ const char *MIR = ++ R"( ++--- | ++ ; ModuleID = '/tmp/test.ll' ++ source_filename = "/tmp/test.ll" ++ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" ++ ++ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { ++ entry: ++ #dbg_value(i32 %x, !10, !DIExpression(), !11) ++ %add = add nsw i32 %x, 1, !dbg !12 ++ ret i32 0 ++ } ++ ++ !llvm.dbg.cu = !{!0} ++ !llvm.module.flags = !{!2} ++ !llvm.ident = !{!3} ++ ++ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") ++ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") ++ !2 = !{i32 2, !"Debug Info Version", i32 3} ++ !3 = !{!"clang"} ++ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) ++ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") ++ !6 = !DISubroutineType(types: !7) ++ !7 = !{!8, !8} ++ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++ !9 = !{!10} ++ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) ++ !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) ++ !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !15) ++ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) ++ !14 = !DILocation(line: 3, column: 2, scope: !4) ++ !15 = !DILocation(line: 4, column: 5, scope: !13, inlinedAt: !14) ++ ++... ++--- ++name: _Z3fooi ++alignment: 4 ++exposesReturnsTwice: false ++legalized: false ++regBankSelected: false ++selected: false ++failedISel: false ++tracksRegLiveness: true ++hasWinCFI: false ++noPhis: false ++isSSA: true ++noVRegs: false ++hasFakeUses: false ++callsEHReturn: false ++callsUnwindInit: false ++hasEHCatchret: false ++hasEHScopes: false ++hasEHFunclets: false ++isOutlined: false ++debugInstrRef: false ++failsVerification: false ++tracksDebugUserValues: false ++registers: ++ - { id: 0, class: _, preferred-register: '', flags: [ ] } ++ - { id: 1, class: _, preferred-register: '', flags: [ ] } ++ - { id: 2, class: _, preferred-register: '', flags: [ ] } ++ - { id: 3, class: _, preferred-register: '', flags: [ ] } ++liveins: ++ - { reg: '$w0', virtual-reg: '' } ++frameInfo: ++ isFrameAddressTaken: false ++ isReturnAddressTaken: false ++ hasStackMap: false ++ hasPatchPoint: false ++ stackSize: 0 ++ offsetAdjustment: 0 ++ maxAlignment: 1 ++ adjustsStack: false ++ hasCalls: false ++ stackProtector: '' ++ functionContext: '' ++ maxCallFrameSize: 4294967295 ++ cvBytesOfCalleeSavedRegisters: 0 ++ hasOpaqueSPAdjustment: false ++ hasVAStart: false ++ hasMustTailInVarArgFunc: false ++ hasTailCall: false ++ isCalleeSavedInfoValid: false ++ localFrameSize: 0 ++ savePoint: '' ++ restorePoint: '' ++fixedStack: [] ++stack: [] ++entry_values: [] ++callSites: [] ++debugValueSubstitutions: [] ++constants: [] ++machineFunctionInfo: {} ++body: | ++ bb.1.entry: ++ liveins: $w0 ++ ++ %0:_(s32) = COPY $w0 ++ %1:_(s32) = G_CONSTANT i32 1 ++ %3:_(s32) = G_CONSTANT i32 0 ++ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 ++ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 ++ $w0 = COPY %3(s32) ++ RET_ReallyLR implicit $w0 ++ )"; ++ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); ++ MachineModuleInfo MMI(TM.get()); ++ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); ++ ASSERT_TRUE(M); ++ ++ DroppedVariableStatsMIR Stats; ++ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); ++ Stats.runBeforePass("Test", MF); ++ ++ // This loop simulates an IR pass that drops debug information. ++ for (auto &MBB : *MF) { ++ for (auto &MI : MBB) { ++ if (MI.isDebugValueLike()) { ++ MI.eraseFromParent(); ++ break; ++ } ++ } ++ break; ++ } ++ ++ Stats.runAfterPass("Test", MF); ++ ASSERT_EQ(Stats.getPassDroppedVariables(), true); ++} ++ ++} // end anonymous namespace +-- +2.46.2 + diff --git a/0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch b/0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch new file mode 100644 index 0000000000000..e68aa98b82b09 --- /dev/null +++ b/0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch @@ -0,0 +1,1045 @@ +From 1f4f368b9c3b92787018a6ee410c5ab4e79b072d Mon Sep 17 00:00:00 2001 +From: Shubham Sandeep Rastogi +Date: Mon, 18 Nov 2024 16:06:26 -0800 +Subject: [PATCH] Reland [NFC] Move DroppedVariableStats to its own file and + redesign it to be extensible. + +Moved the IR unit test to the CodeGen folder to resolve linker errors: + +error: undefined reference to 'vtable for llvm::DroppedVariableStatsIR' +--- + .../llvm/CodeGen/DroppedVariableStats.h | 226 ++++++++++++++++++ + .../llvm/Passes/StandardInstrumentations.h | 80 +------ + llvm/lib/CodeGen/CMakeLists.txt | 1 + + llvm/lib/CodeGen/DroppedVariableStats.cpp | 194 +++++++++++++++ + llvm/lib/Passes/StandardInstrumentations.cpp | 178 +------------- + llvm/unittests/CodeGen/CMakeLists.txt | 1 + + .../DroppedVariableStatsIRTest.cpp} | 74 +++--- + llvm/unittests/IR/CMakeLists.txt | 1 - + 8 files changed, 456 insertions(+), 299 deletions(-) + create mode 100644 llvm/include/llvm/CodeGen/DroppedVariableStats.h + create mode 100644 llvm/lib/CodeGen/DroppedVariableStats.cpp + rename llvm/unittests/{IR/DroppedVariableStatsTest.cpp => CodeGen/DroppedVariableStatsIRTest.cpp} (91%) + +diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h +new file mode 100644 +index 000000000000..371d775b02e8 +--- /dev/null ++++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h +@@ -0,0 +1,226 @@ ++///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// ++/// ++/// Part of the LLVM Project, under the Apache License v2.0 with LLVM ++/// Exceptions. See https://llvm.org/LICENSE.txt for license information. ++/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++/// ++///===---------------------------------------------------------------------===// ++/// \file ++/// Dropped Variable Statistics for Debug Information. Reports any number ++/// of #dbg_value that get dropped due to an optimization pass. ++/// ++///===---------------------------------------------------------------------===// ++ ++#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H ++#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H ++ ++#include "llvm/CodeGen/MachinePassManager.h" ++#include "llvm/IR/DebugInfoMetadata.h" ++#include "llvm/IR/DiagnosticInfo.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/Module.h" ++#include "llvm/IR/PassInstrumentation.h" ++ ++namespace llvm { ++ ++/// A unique key that represents a debug variable. ++/// First const DIScope *: Represents the scope of the debug variable. ++/// Second const DIScope *: Represents the InlinedAt scope of the debug ++/// variable. const DILocalVariable *: It is a pointer to the debug variable ++/// itself. ++using VarID = ++ std::tuple; ++ ++/// A base class to collect and print dropped debug information variable ++/// statistics. ++class DroppedVariableStats { ++public: ++ DroppedVariableStats(bool DroppedVarStatsEnabled) ++ : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { ++ if (DroppedVarStatsEnabled) ++ llvm::outs() ++ << "Pass Level, Pass Name, Num of Dropped Variables, Func or " ++ "Module Name\n"; ++ }; ++ ++ virtual ~DroppedVariableStats() = default; ++ ++ // We intend this to be unique per-compilation, thus no copies. ++ DroppedVariableStats(const DroppedVariableStats &) = delete; ++ void operator=(const DroppedVariableStats &) = delete; ++ ++ bool getPassDroppedVariables() { return PassDroppedVariables; } ++ ++protected: ++ void setup() { ++ DebugVariablesStack.push_back( ++ {DenseMap()}); ++ InlinedAts.push_back( ++ {DenseMap>()}); ++ } ++ ++ void cleanup() { ++ assert(!DebugVariablesStack.empty() && ++ "DebugVariablesStack shouldn't be empty!"); ++ assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); ++ DebugVariablesStack.pop_back(); ++ InlinedAts.pop_back(); ++ } ++ ++ bool DroppedVariableStatsEnabled = false; ++ struct DebugVariables { ++ /// DenseSet of VarIDs before an optimization pass has run. ++ DenseSet DebugVariablesBefore; ++ /// DenseSet of VarIDs after an optimization pass has run. ++ DenseSet DebugVariablesAfter; ++ }; ++ ++protected: ++ /// A stack of a DenseMap, that maps DebugVariables for every pass to an ++ /// llvm::Function. A stack is used because an optimization pass can call ++ /// other passes. ++ SmallVector> DebugVariablesStack; ++ ++ /// A DenseSet tracking whether a scope was visited before. ++ DenseSet VisitedScope; ++ /// A stack of DenseMaps, which map the name of an llvm::Function to a ++ /// DenseMap of VarIDs and their inlinedAt locations before an optimization ++ /// pass has run. ++ SmallVector>> InlinedAts; ++ /// Calculate the number of dropped variables in an llvm::Function or ++ /// llvm::MachineFunction and print the relevant information to stdout. ++ void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, ++ StringRef FuncName, StringRef PassID, ++ StringRef FuncOrModName, ++ StringRef PassLevel, const Function *Func); ++ ++ /// Check if a \p Var has been dropped or is a false positive. Also update the ++ /// \p DroppedCount if a debug variable is dropped. ++ bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, ++ const DIScope *DbgValScope, ++ DenseMap &InlinedAtsMap, ++ VarID Var, unsigned &DroppedCount); ++ /// Run code to populate relevant data structures over an llvm::Function or ++ /// llvm::MachineFunction. ++ void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before); ++ /// Populate the VarIDSet and InlinedAtMap with the relevant information ++ /// needed for before and after pass analysis to determine dropped variable ++ /// status. ++ void populateVarIDSetAndInlinedMap( ++ const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before); ++ /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the ++ /// debug variable denoted by its ID \p Var may have been dropped by an ++ /// optimization pass. ++ virtual void ++ visitEveryInstruction(unsigned &DroppedCount, ++ DenseMap &InlinedAtsMap, ++ VarID Var) = 0; ++ /// Visit every debug record in an llvm::Function or llvm::MachineFunction ++ /// and call populateVarIDSetAndInlinedMap on it. ++ virtual void visitEveryDebugRecord( ++ DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before) = 0; ++ ++private: ++ /// Remove a dropped debug variable's VarID from all Sets in the ++ /// DroppedVariablesBefore stack. ++ void removeVarFromAllSets(VarID Var, const Function *F) { ++ // Do not remove Var from the last element, it will be popped from the ++ // stack. ++ for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) ++ DebugVariablesMap[F].DebugVariablesBefore.erase(Var); ++ } ++ /// Return true if \p Scope is the same as \p DbgValScope or a child scope of ++ /// \p DbgValScope, return false otherwise. ++ bool isScopeChildOfOrEqualTo(const DIScope *Scope, ++ const DIScope *DbgValScope); ++ /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of ++ /// the InlinedAt chain, return false otherwise. ++ bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, ++ const DILocation *DbgValInlinedAt); ++ bool PassDroppedVariables = false; ++}; ++ ++/// A class to collect and print dropped debug information due to LLVM IR ++/// optimization passes. After every LLVM IR pass is run, it will print how many ++/// #dbg_values were dropped due to that pass. ++class DroppedVariableStatsIR : public DroppedVariableStats { ++public: ++ DroppedVariableStatsIR(bool DroppedVarStatsEnabled) ++ : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} ++ ++ virtual ~DroppedVariableStatsIR() = default; ++ ++ void runBeforePass(Any IR) { ++ setup(); ++ if (const auto *M = unwrapIR(IR)) ++ return this->runOnModule(M, true); ++ if (const auto *F = unwrapIR(IR)) ++ return this->runOnFunction(F, true); ++ } ++ ++ void runAfterPass(StringRef P, Any IR) { ++ if (const auto *M = unwrapIR(IR)) ++ runAfterPassModule(P, M); ++ else if (const auto *F = unwrapIR(IR)) ++ runAfterPassFunction(P, F); ++ cleanup(); ++ } ++ ++ void registerCallbacks(PassInstrumentationCallbacks &PIC); ++ ++private: ++ const Function *Func; ++ ++ void runAfterPassFunction(StringRef PassID, const Function *F) { ++ runOnFunction(F, false); ++ calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), ++ "Function"); ++ } ++ ++ void runAfterPassModule(StringRef PassID, const Module *M) { ++ runOnModule(M, false); ++ calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); ++ } ++ /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or ++ /// after a pass has run to facilitate dropped variable calculation for an ++ /// llvm::Function. ++ void runOnFunction(const Function *F, bool Before); ++ /// Iterate over all Instructions in a Function and report any dropped debug ++ /// information. ++ void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, ++ StringRef FuncOrModName, ++ StringRef PassLevel); ++ /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or ++ /// after a pass has run to facilitate dropped variable calculation for an ++ /// llvm::Module. Calls runOnFunction on every Function in the Module. ++ void runOnModule(const Module *M, bool Before); ++ /// Iterate over all Functions in a Module and report any dropped debug ++ /// information. Will call calculateDroppedVarStatsOnFunction on every ++ /// Function. ++ void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, ++ StringRef FuncOrModName, ++ StringRef PassLevel); ++ /// Override base class method to run on an llvm::Function specifically. ++ virtual void ++ visitEveryInstruction(unsigned &DroppedCount, ++ DenseMap &InlinedAtsMap, ++ VarID Var) override; ++ /// Override base class method to run on #dbg_values specifically. ++ virtual void visitEveryDebugRecord( ++ DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before) override; ++ ++ template static const IRUnitT *unwrapIR(Any IR) { ++ const IRUnitT **IRPtr = llvm::any_cast(&IR); ++ return IRPtr ? *IRPtr : nullptr; ++ } ++}; ++ ++} // namespace llvm ++ ++#endif +diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h +index 9301a12c740e..12a34c099eaf 100644 +--- a/llvm/include/llvm/Passes/StandardInstrumentations.h ++++ b/llvm/include/llvm/Passes/StandardInstrumentations.h +@@ -19,6 +19,7 @@ + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/StringRef.h" + #include "llvm/ADT/StringSet.h" ++#include "llvm/CodeGen/DroppedVariableStats.h" + #include "llvm/CodeGen/MachineBasicBlock.h" + #include "llvm/IR/BasicBlock.h" + #include "llvm/IR/DebugInfoMetadata.h" +@@ -579,83 +580,6 @@ private: + static void SignalHandler(void *); + }; + +-/// A class to collect and print dropped debug information variable statistics. +-/// After every LLVM IR pass is run, it will print how many #dbg_values were +-/// dropped due to that pass. +-class DroppedVariableStats { +-public: +- DroppedVariableStats(bool DroppedVarStatsEnabled) { +- if (DroppedVarStatsEnabled) +- llvm::outs() +- << "Pass Level, Pass Name, Num of Dropped Variables, Func or " +- "Module Name\n"; +- }; +- // We intend this to be unique per-compilation, thus no copies. +- DroppedVariableStats(const DroppedVariableStats &) = delete; +- void operator=(const DroppedVariableStats &) = delete; +- +- void registerCallbacks(PassInstrumentationCallbacks &PIC); +- void runBeforePass(StringRef PassID, Any IR); +- void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); +- void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); +- bool getPassDroppedVariables() { return PassDroppedVariables; } +- +-private: +- bool PassDroppedVariables = false; +- /// A unique key that represents a #dbg_value. +- using VarID = +- std::tuple; +- +- struct DebugVariables { +- /// DenseSet of VarIDs before an optimization pass has run. +- DenseSet DebugVariablesBefore; +- /// DenseSet of VarIDs after an optimization pass has run. +- DenseSet DebugVariablesAfter; +- }; +- +- /// A stack of a DenseMap, that maps DebugVariables for every pass to an +- /// llvm::Function. A stack is used because an optimization pass can call +- /// other passes. +- SmallVector> DebugVariablesStack; +- +- /// A DenseSet tracking whether a scope was visited before. +- DenseSet VisitedScope; +- /// A stack of DenseMaps, which map the name of an llvm::Function to a +- /// DenseMap of VarIDs and their inlinedAt locations before an optimization +- /// pass has run. +- SmallVector>> InlinedAts; +- +- /// Iterate over all Functions in a Module and report any dropped debug +- /// information. Will call calculateDroppedVarStatsOnFunction on every +- /// Function. +- void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, +- std::string FuncOrModName, +- std::string PassLevel); +- /// Iterate over all Instructions in a Function and report any dropped debug +- /// information. +- void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, +- std::string FuncOrModName, +- std::string PassLevel); +- /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or +- /// after a pass has run to facilitate dropped variable calculation for an +- /// llvm::Function. +- void runOnFunction(const Function *F, bool Before); +- /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or +- /// after a pass has run to facilitate dropped variable calculation for an +- /// llvm::Module. Calls runOnFunction on every Function in the Module. +- void runOnModule(const Module *M, bool Before); +- /// Remove a dropped #dbg_value VarID from all Sets in the +- /// DroppedVariablesBefore stack. +- void removeVarFromAllSets(VarID Var, const Function *F); +- /// Return true if \p Scope is the same as \p DbgValScope or a child scope of +- /// \p DbgValScope, return false otherwise. +- bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); +- /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of +- /// the InlinedAt chain, return false otherwise. +- bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, +- const DILocation *DbgValInlinedAt); +-}; +- + /// This class provides an interface to register all the standard pass + /// instrumentations and manages their state (if any). + class StandardInstrumentations { +@@ -673,7 +597,7 @@ class StandardInstrumentations { + PrintCrashIRInstrumentation PrintCrashIR; + IRChangedTester ChangeTester; + VerifyInstrumentation Verify; +- DroppedVariableStats DroppedStats; ++ DroppedVariableStatsIR DroppedStatsIR; + + bool VerifyEach; + +diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt +index 7b47c0e6f75d..263d4a9ee94d 100644 +--- a/llvm/lib/CodeGen/CMakeLists.txt ++++ b/llvm/lib/CodeGen/CMakeLists.txt +@@ -50,6 +50,7 @@ add_llvm_component_library(LLVMCodeGen + DeadMachineInstructionElim.cpp + DetectDeadLanes.cpp + DFAPacketizer.cpp ++ DroppedVariableStats.cpp + DwarfEHPrepare.cpp + EarlyIfConversion.cpp + EdgeBundles.cpp +diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp +new file mode 100644 +index 000000000000..122fcad1293f +--- /dev/null ++++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp +@@ -0,0 +1,194 @@ ++///===- DroppedVariableStats.cpp ------------------------------------------===// ++/// ++/// Part of the LLVM Project, under the Apache License v2.0 with LLVM ++/// Exceptions. See https://llvm.org/LICENSE.txt for license information. ++/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++/// ++///===---------------------------------------------------------------------===// ++/// \file ++/// Dropped Variable Statistics for Debug Information. Reports any number ++/// of #dbg_value that get dropped due to an optimization pass. ++/// ++///===---------------------------------------------------------------------===// ++ ++#include "llvm/CodeGen/DroppedVariableStats.h" ++#include "llvm/IR/DebugInfoMetadata.h" ++#include "llvm/IR/InstIterator.h" ++#include "llvm/IR/Module.h" ++ ++using namespace llvm; ++ ++bool DroppedVariableStats::isScopeChildOfOrEqualTo(const DIScope *Scope, ++ const DIScope *DbgValScope) { ++ while (Scope != nullptr) { ++ if (VisitedScope.find(Scope) == VisitedScope.end()) { ++ VisitedScope.insert(Scope); ++ if (Scope == DbgValScope) { ++ VisitedScope.clear(); ++ return true; ++ } ++ Scope = Scope->getScope(); ++ } else { ++ VisitedScope.clear(); ++ return false; ++ } ++ } ++ return false; ++} ++ ++bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( ++ const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { ++ if (DbgValInlinedAt == InlinedAt) ++ return true; ++ if (!DbgValInlinedAt) ++ return false; ++ auto *IA = InlinedAt; ++ while (IA) { ++ if (IA == DbgValInlinedAt) ++ return true; ++ IA = IA->getInlinedAt(); ++ } ++ return false; ++} ++ ++void DroppedVariableStats::calculateDroppedStatsAndPrint( ++ DebugVariables &DbgVariables, StringRef FuncName, StringRef PassID, ++ StringRef FuncOrModName, StringRef PassLevel, const Function *Func) { ++ unsigned DroppedCount = 0; ++ DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; ++ DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; ++ DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; ++ // Find an Instruction that shares the same scope as the dropped #dbg_value or ++ // has a scope that is the child of the scope of the #dbg_value, and has an ++ // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain ++ // contains the inlinedAt of the #dbg_value, if such an Instruction is found, ++ // debug information is dropped. ++ for (VarID Var : DebugVariablesBeforeSet) { ++ if (DebugVariablesAfterSet.contains(Var)) ++ continue; ++ visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); ++ removeVarFromAllSets(Var, Func); ++ } ++ if (DroppedCount > 0) { ++ llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " ++ << FuncOrModName << "\n"; ++ PassDroppedVariables = true; ++ } else ++ PassDroppedVariables = false; ++} ++ ++bool DroppedVariableStats::updateDroppedCount( ++ DILocation *DbgLoc, const DIScope *Scope, const DIScope *DbgValScope, ++ DenseMap &InlinedAtsMap, VarID Var, ++ unsigned &DroppedCount) { ++ ++ // If the Scope is a child of, or equal to the DbgValScope and is inlined at ++ // the Var's InlinedAt location, return true to signify that the Var has been ++ // dropped. ++ if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) ++ if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), ++ InlinedAtsMap[Var])) { ++ // Found another instruction in the variable's scope, so there exists a ++ // break point at which the variable could be observed. Count it as ++ // dropped. ++ DroppedCount++; ++ return true; ++ } ++ return false; ++} ++ ++void DroppedVariableStats::run(DebugVariables &DbgVariables, StringRef FuncName, ++ bool Before) { ++ auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore ++ : DbgVariables.DebugVariablesAfter); ++ auto &InlinedAtsMap = InlinedAts.back(); ++ if (Before) ++ InlinedAtsMap.try_emplace(FuncName, DenseMap()); ++ VarIDSet = DenseSet(); ++ visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); ++} ++ ++void DroppedVariableStats::populateVarIDSetAndInlinedMap( ++ const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before) { ++ VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; ++ VarIDSet.insert(Key); ++ if (Before) ++ InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); ++} ++ ++void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) { ++ auto &DebugVariables = DebugVariablesStack.back()[F]; ++ auto FuncName = F->getName(); ++ Func = F; ++ run(DebugVariables, FuncName, Before); ++} ++ ++void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction( ++ const Function *F, StringRef PassID, StringRef FuncOrModName, ++ StringRef PassLevel) { ++ Func = F; ++ StringRef FuncName = F->getName(); ++ DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; ++ calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, ++ PassLevel, Func); ++} ++ ++void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) { ++ for (auto &F : *M) ++ runOnFunction(&F, Before); ++} ++ ++void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule( ++ const Module *M, StringRef PassID, StringRef FuncOrModName, ++ StringRef PassLevel) { ++ for (auto &F : *M) { ++ calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); ++ } ++} ++ ++void DroppedVariableStatsIR::registerCallbacks( ++ PassInstrumentationCallbacks &PIC) { ++ if (!DroppedVariableStatsEnabled) ++ return; ++ ++ PIC.registerBeforeNonSkippedPassCallback( ++ [this](StringRef P, Any IR) { return runBeforePass(IR); }); ++ PIC.registerAfterPassCallback( ++ [this](StringRef P, Any IR, const PreservedAnalyses &PA) { ++ return runAfterPass(P, IR); ++ }); ++ PIC.registerAfterPassInvalidatedCallback( ++ [this](StringRef P, const PreservedAnalyses &PA) { return cleanup(); }); ++} ++ ++void DroppedVariableStatsIR::visitEveryInstruction( ++ unsigned &DroppedCount, DenseMap &InlinedAtsMap, ++ VarID Var) { ++ const DIScope *DbgValScope = std::get<0>(Var); ++ for (const auto &I : instructions(Func)) { ++ auto *DbgLoc = I.getDebugLoc().get(); ++ if (!DbgLoc) ++ continue; ++ if (updateDroppedCount(DbgLoc, DbgLoc->getScope(), DbgValScope, ++ InlinedAtsMap, Var, DroppedCount)) ++ break; ++ } ++} ++ ++void DroppedVariableStatsIR::visitEveryDebugRecord( ++ DenseSet &VarIDSet, ++ DenseMap> &InlinedAtsMap, ++ StringRef FuncName, bool Before) { ++ for (const auto &I : instructions(Func)) { ++ for (DbgRecord &DR : I.getDbgRecordRange()) { ++ if (auto *Dbg = dyn_cast(&DR)) { ++ auto *DbgVar = Dbg->getVariable(); ++ auto DbgLoc = DR.getDebugLoc(); ++ populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, ++ FuncName, Before); ++ } ++ } ++ } ++} +diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp +index 6259f8f736c8..b766517e68eb 100644 +--- a/llvm/lib/Passes/StandardInstrumentations.cpp ++++ b/llvm/lib/Passes/StandardInstrumentations.cpp +@@ -2462,7 +2462,7 @@ StandardInstrumentations::StandardInstrumentations( + PrintChanged == ChangePrinter::ColourDiffVerbose || + PrintChanged == ChangePrinter::ColourDiffQuiet), + WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), +- Verify(DebugLogging), DroppedStats(DroppedVarStats), ++ Verify(DebugLogging), DroppedStatsIR(DroppedVarStats), + VerifyEach(VerifyEach) {} + + PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = +@@ -2523,180 +2523,6 @@ void PrintCrashIRInstrumentation::registerCallbacks( + }); + } + +-void DroppedVariableStats::registerCallbacks( +- PassInstrumentationCallbacks &PIC) { +- if (!DroppedVarStats) +- return; +- +- PIC.registerBeforeNonSkippedPassCallback( +- [this](StringRef P, Any IR) { return this->runBeforePass(P, IR); }); +- PIC.registerAfterPassCallback( +- [this](StringRef P, Any IR, const PreservedAnalyses &PA) { +- return this->runAfterPass(P, IR, PA); +- }); +- PIC.registerAfterPassInvalidatedCallback( +- [this](StringRef P, const PreservedAnalyses &PA) { +- return this->runAfterPassInvalidated(P, PA); +- }); +-} +- +-void DroppedVariableStats::runBeforePass(StringRef PassID, Any IR) { +- DebugVariablesStack.push_back({DenseMap()}); +- InlinedAts.push_back({DenseMap>()}); +- if (auto *M = unwrapIR(IR)) +- return this->runOnModule(M, true); +- if (auto *F = unwrapIR(IR)) +- return this->runOnFunction(F, true); +-} +- +-void DroppedVariableStats::runOnFunction(const Function *F, bool Before) { +- auto &DebugVariables = DebugVariablesStack.back()[F]; +- auto &VarIDSet = (Before ? DebugVariables.DebugVariablesBefore +- : DebugVariables.DebugVariablesAfter); +- auto &InlinedAtsMap = InlinedAts.back(); +- auto FuncName = F->getName(); +- if (Before) +- InlinedAtsMap.try_emplace(FuncName, DenseMap()); +- VarIDSet = DenseSet(); +- for (const auto &I : instructions(F)) { +- for (DbgRecord &DR : I.getDbgRecordRange()) { +- if (auto *Dbg = dyn_cast(&DR)) { +- auto *DbgVar = Dbg->getVariable(); +- auto DbgLoc = DR.getDebugLoc(); +- VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; +- VarIDSet.insert(Key); +- if (Before) +- InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); +- } +- } +- } +-} +- +-void DroppedVariableStats::runOnModule(const Module *M, bool Before) { +- for (auto &F : *M) +- runOnFunction(&F, Before); +-} +- +-void DroppedVariableStats::removeVarFromAllSets(VarID Var, const Function *F) { +- // Do not remove Var from the last element, it will be popped from the stack. +- for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) +- DebugVariablesMap[F].DebugVariablesBefore.erase(Var); +-} +- +-void DroppedVariableStats::calculateDroppedVarStatsOnModule( +- const Module *M, StringRef PassID, std::string FuncOrModName, +- std::string PassLevel) { +- for (auto &F : *M) { +- calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); +- } +-} +- +-void DroppedVariableStats::calculateDroppedVarStatsOnFunction( +- const Function *F, StringRef PassID, std::string FuncOrModName, +- std::string PassLevel) { +- unsigned DroppedCount = 0; +- StringRef FuncName = F->getName(); +- DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; +- DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; +- DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; +- DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; +- // Find an Instruction that shares the same scope as the dropped #dbg_value or +- // has a scope that is the child of the scope of the #dbg_value, and has an +- // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain +- // contains the inlinedAt of the #dbg_value, if such an Instruction is found, +- // debug information is dropped. +- for (VarID Var : DebugVariablesBeforeSet) { +- if (DebugVariablesAfterSet.contains(Var)) +- continue; +- const DIScope *DbgValScope = std::get<0>(Var); +- for (const auto &I : instructions(F)) { +- auto *DbgLoc = I.getDebugLoc().get(); +- if (!DbgLoc) +- continue; +- +- auto *Scope = DbgLoc->getScope(); +- if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) { +- if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), +- InlinedAtsMap[Var])) { +- // Found another instruction in the variable's scope, so there exists +- // a break point at which the variable could be observed. Count it as +- // dropped. +- DroppedCount++; +- break; +- } +- } +- } +- removeVarFromAllSets(Var, F); +- } +- if (DroppedCount > 0) { +- llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " +- << FuncOrModName << "\n"; +- PassDroppedVariables = true; +- } else +- PassDroppedVariables = false; +-} +- +-void DroppedVariableStats::runAfterPassInvalidated( +- StringRef PassID, const PreservedAnalyses &PA) { +- DebugVariablesStack.pop_back(); +- InlinedAts.pop_back(); +-} +- +-void DroppedVariableStats::runAfterPass(StringRef PassID, Any IR, +- const PreservedAnalyses &PA) { +- std::string PassLevel; +- std::string FuncOrModName; +- if (auto *M = unwrapIR(IR)) { +- this->runOnModule(M, false); +- PassLevel = "Module"; +- FuncOrModName = M->getName(); +- calculateDroppedVarStatsOnModule(M, PassID, FuncOrModName, PassLevel); +- } else if (auto *F = unwrapIR(IR)) { +- this->runOnFunction(F, false); +- PassLevel = "Function"; +- FuncOrModName = F->getName(); +- calculateDroppedVarStatsOnFunction(F, PassID, FuncOrModName, PassLevel); +- } +- +- DebugVariablesStack.pop_back(); +- InlinedAts.pop_back(); +-} +- +-bool DroppedVariableStats::isScopeChildOfOrEqualTo(DIScope *Scope, +- const DIScope *DbgValScope) { +- while (Scope != nullptr) { +- if (VisitedScope.find(Scope) == VisitedScope.end()) { +- VisitedScope.insert(Scope); +- if (Scope == DbgValScope) { +- VisitedScope.clear(); +- return true; +- } +- Scope = Scope->getScope(); +- } else { +- VisitedScope.clear(); +- return false; +- } +- } +- return false; +-} +- +-bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( +- const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { +- if (DbgValInlinedAt == InlinedAt) +- return true; +- if (!DbgValInlinedAt) +- return false; +- if (!InlinedAt) +- return false; +- auto *IA = InlinedAt; +- while (IA) { +- if (IA == DbgValInlinedAt) +- return true; +- IA = IA->getInlinedAt(); +- } +- return false; +-} +- + void StandardInstrumentations::registerCallbacks( + PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) { + PrintIR.registerCallbacks(PIC); +@@ -2712,7 +2538,7 @@ void StandardInstrumentations::registerCallbacks( + WebsiteChangeReporter.registerCallbacks(PIC); + ChangeTester.registerCallbacks(PIC); + PrintCrashIR.registerCallbacks(PIC); +- DroppedStats.registerCallbacks(PIC); ++ DroppedStatsIR.registerCallbacks(PIC); + if (MAM) + PreservedCFGChecker.registerCallbacks(PIC, *MAM); + +diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt +index 963cdcc0275e..807fd1a9b7b5 100644 +--- a/llvm/unittests/CodeGen/CMakeLists.txt ++++ b/llvm/unittests/CodeGen/CMakeLists.txt +@@ -27,6 +27,7 @@ add_llvm_unittest(CodeGenTests + CCStateTest.cpp + DIEHashTest.cpp + DIETest.cpp ++ DroppedVariableStatsIRTest.cpp + DwarfStringPoolEntryRefTest.cpp + InstrRefLDVTest.cpp + LowLevelTypeTest.cpp +diff --git a/llvm/unittests/IR/DroppedVariableStatsTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp +similarity index 91% +rename from llvm/unittests/IR/DroppedVariableStatsTest.cpp +rename to llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp +index 61f3a87bb355..094ec7b65763 100644 +--- a/llvm/unittests/IR/DroppedVariableStatsTest.cpp ++++ b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp +@@ -1,5 +1,4 @@ +-//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests +-//----------===// ++//===- unittests/IR/DroppedVariableStatsIRTest.cpp ------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -8,6 +7,7 @@ + //===----------------------------------------------------------------------===// + + #include "llvm/AsmParser/Parser.h" ++#include "llvm/CodeGen/DroppedVariableStats.h" + #include "llvm/IR/Function.h" + #include "llvm/IR/InstIterator.h" + #include "llvm/IR/LegacyPassManager.h" +@@ -44,7 +44,7 @@ namespace { + // This test ensures that if a #dbg_value and an instruction that exists in the + // same scope as that #dbg_value are both deleted as a result of an optimization + // pass, debug information is considered not dropped. +-TEST(DroppedVariableStats, BothDeleted) { ++TEST(DroppedVariableStatsIR, BothDeleted) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -79,9 +79,8 @@ TEST(DroppedVariableStats, BothDeleted) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -92,16 +91,15 @@ TEST(DroppedVariableStats, BothDeleted) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), false); + } + + // This test ensures that if a #dbg_value is dropped after an optimization pass, + // but an instruction that shares the same scope as the #dbg_value still exists, + // debug information is conisdered dropped. +-TEST(DroppedVariableStats, DbgValLost) { ++TEST(DroppedVariableStatsIR, DbgValLost) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -136,9 +134,8 @@ TEST(DroppedVariableStats, DbgValLost) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -148,16 +145,15 @@ TEST(DroppedVariableStats, DbgValLost) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); + } + + // This test ensures that if a #dbg_value is dropped after an optimization pass, + // but an instruction that has an unrelated scope as the #dbg_value still + // exists, debug information is conisdered not dropped. +-TEST(DroppedVariableStats, UnrelatedScopes) { ++TEST(DroppedVariableStatsIR, UnrelatedScopes) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -193,9 +189,8 @@ TEST(DroppedVariableStats, UnrelatedScopes) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -205,16 +200,15 @@ TEST(DroppedVariableStats, UnrelatedScopes) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), false); + } + + // This test ensures that if a #dbg_value is dropped after an optimization pass, + // but an instruction that has a scope which is a child of the #dbg_value scope + // still exists, debug information is conisdered dropped. +-TEST(DroppedVariableStats, ChildScopes) { ++TEST(DroppedVariableStatsIR, ChildScopes) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -250,9 +244,8 @@ TEST(DroppedVariableStats, ChildScopes) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -262,9 +255,8 @@ TEST(DroppedVariableStats, ChildScopes) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); + } + +@@ -272,7 +264,7 @@ TEST(DroppedVariableStats, ChildScopes) { + // but an instruction that has a scope which is a child of the #dbg_value scope + // still exists, and the #dbg_value is inlined at another location, debug + // information is conisdered not dropped. +-TEST(DroppedVariableStats, InlinedAt) { ++TEST(DroppedVariableStatsIR, InlinedAt) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -308,9 +300,8 @@ TEST(DroppedVariableStats, InlinedAt) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -320,9 +311,8 @@ TEST(DroppedVariableStats, InlinedAt) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), false); + } + +@@ -330,7 +320,7 @@ TEST(DroppedVariableStats, InlinedAt) { + // but an instruction that has a scope which is a child of the #dbg_value scope + // still exists, and the #dbg_value and the instruction are inlined at another + // location, debug information is conisdered dropped. +-TEST(DroppedVariableStats, InlinedAtShared) { ++TEST(DroppedVariableStatsIR, InlinedAtShared) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -366,9 +356,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -378,9 +367,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); + } + +@@ -388,7 +376,7 @@ TEST(DroppedVariableStats, InlinedAtShared) { + // but an instruction that has a scope which is a child of the #dbg_value scope + // still exists, and the instruction is inlined at a location that is the + // #dbg_value's inlined at location, debug information is conisdered dropped. +-TEST(DroppedVariableStats, InlinedAtChild) { ++TEST(DroppedVariableStatsIR, InlinedAtChild) { + PassInstrumentationCallbacks PIC; + PassInstrumentation PI(&PIC); + +@@ -425,9 +413,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + +- DroppedVariableStats Stats(true); +- Stats.runBeforePass("Test", +- llvm::Any(const_cast(M.get()))); ++ DroppedVariableStatsIR Stats(true); ++ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + + // This loop simulates an IR pass that drops debug information. + for (auto &F : *M) { +@@ -437,9 +424,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { + } + break; + } +- PreservedAnalyses PA; + Stats.runAfterPass("Test", +- llvm::Any(const_cast(M.get())), PA); ++ llvm::Any(const_cast(M.get()))); + ASSERT_EQ(Stats.getPassDroppedVariables(), true); + } + +diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt +index ed93ee547d22..e5c8630f3eed 100644 +--- a/llvm/unittests/IR/CMakeLists.txt ++++ b/llvm/unittests/IR/CMakeLists.txt +@@ -43,7 +43,6 @@ add_llvm_unittest(IRTests + ShuffleVectorInstTest.cpp + StructuralHashTest.cpp + TimePassesTest.cpp +- DroppedVariableStatsTest.cpp + TypesTest.cpp + UseTest.cpp + UserTest.cpp +-- +2.46.2 + diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h new file mode 100644 index 0000000000000..c7b654ea58557 --- /dev/null +++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h @@ -0,0 +1,224 @@ +///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// +/// +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM +/// Exceptions. See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/// +///===---------------------------------------------------------------------===// +/// \file +/// Dropped Variable Statistics for Debug Information. Reports any number +/// of #dbg_value that get dropped due to an optimization pass. +/// +///===---------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H +#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H + +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassInstrumentation.h" + +namespace llvm { + +/// A unique key that represents a debug variable. +/// First const DIScope *: Represents the scope of the debug variable. +/// Second const DIScope *: Represents the InlinedAt scope of the debug +/// variable. const DILocalVariable *: It is a pointer to the debug variable +/// itself. +using VarID = + std::tuple; + +/// A base class to collect and print dropped debug information variable +/// statistics. +class DroppedVariableStats { +public: + DroppedVariableStats(bool DroppedVarStatsEnabled) + : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { + if (DroppedVarStatsEnabled) + llvm::outs() + << "Pass Level, Pass Name, Num of Dropped Variables, Func or " + "Module Name\n"; + }; + + virtual ~DroppedVariableStats() = default; + + // We intend this to be unique per-compilation, thus no copies. + DroppedVariableStats(const DroppedVariableStats &) = delete; + void operator=(const DroppedVariableStats &) = delete; + + bool getPassDroppedVariables() { return PassDroppedVariables; } + +protected: + void setup() { + DebugVariablesStack.push_back( + {DenseMap()}); + InlinedAts.push_back( + {DenseMap>()}); + } + + void cleanup() { + assert(!DebugVariablesStack.empty() && + "DebugVariablesStack shouldn't be empty!"); + assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); + } + + bool DroppedVariableStatsEnabled = false; + struct DebugVariables { + /// DenseSet of VarIDs before an optimization pass has run. + DenseSet DebugVariablesBefore; + /// DenseSet of VarIDs after an optimization pass has run. + DenseSet DebugVariablesAfter; + }; + +protected: + /// A stack of a DenseMap, that maps DebugVariables for every pass to an + /// llvm::Function. A stack is used because an optimization pass can call + /// other passes. + SmallVector> DebugVariablesStack; + + /// A DenseSet tracking whether a scope was visited before. + DenseSet VisitedScope; + /// A stack of DenseMaps, which map the name of an llvm::Function to a + /// DenseMap of VarIDs and their inlinedAt locations before an optimization + /// pass has run. + SmallVector>> InlinedAts; + /// Calculate the number of dropped variables in an llvm::Function or + /// llvm::MachineFunction and print the relevant information to stdout. + void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, + StringRef FuncName, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel, const Function *Func); + + /// Check if a \p Var has been dropped or is a false positive. Also update the + /// \p DroppedCount if a debug variable is dropped. + bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, + const DIScope *DbgValScope, + DenseMap &InlinedAtsMap, + VarID Var, unsigned &DroppedCount); + /// Run code to populate relevant data structures over an llvm::Function or + /// llvm::MachineFunction. + void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before); + /// Populate the VarIDSet and InlinedAtMap with the relevant information + /// needed for before and after pass analysis to determine dropped variable + /// status. + void populateVarIDSetAndInlinedMap( + const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before); + /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the + /// debug variable denoted by its ID \p Var may have been dropped by an + /// optimization pass. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap &InlinedAtsMap, + VarID Var) = 0; + /// Visit every debug record in an llvm::Function or llvm::MachineFunction + /// and call populateVarIDSetAndInlinedMap on it. + virtual void visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) = 0; + +private: + /// Remove a dropped debug variable's VarID from all Sets in the + /// DroppedVariablesBefore stack. + void removeVarFromAllSets(VarID Var, const Function *F) { + // Do not remove Var from the last element, it will be popped from the + // stack. + for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) + DebugVariablesMap[F].DebugVariablesBefore.erase(Var); + } + /// Return true if \p Scope is the same as \p DbgValScope or a child scope of + /// \p DbgValScope, return false otherwise. + bool isScopeChildOfOrEqualTo(const DIScope *Scope, + const DIScope *DbgValScope); + /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of + /// the InlinedAt chain, return false otherwise. + bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, + const DILocation *DbgValInlinedAt); + bool PassDroppedVariables = false; +}; + +/// A class to collect and print dropped debug information due to LLVM IR +/// optimization passes. After every LLVM IR pass is run, it will print how many +/// #dbg_values were dropped due to that pass. +class DroppedVariableStatsIR : public DroppedVariableStats { +public: + DroppedVariableStatsIR(bool DroppedVarStatsEnabled) + : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} + + void runBeforePass(Any IR) { + setup(); + if (const auto *M = unwrapIR(IR)) + return this->runOnModule(M, true); + if (const auto *F = unwrapIR(IR)) + return this->runOnFunction(F, true); + } + + void runAfterPass(StringRef P, Any IR) { + if (const auto *M = unwrapIR(IR)) + runAfterPassModule(P, M); + else if (const auto *F = unwrapIR(IR)) + runAfterPassFunction(P, F); + cleanup(); + } + + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +private: + const Function *Func; + + void runAfterPassFunction(StringRef PassID, const Function *F) { + runOnFunction(F, false); + calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), + "Function"); + } + + void runAfterPassModule(StringRef PassID, const Module *M) { + runOnModule(M, false); + calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); + } + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Function. + void runOnFunction(const Function *F, bool Before); + /// Iterate over all Instructions in a Function and report any dropped debug + /// information. + void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Module. Calls runOnFunction on every Function in the Module. + void runOnModule(const Module *M, bool Before); + /// Iterate over all Functions in a Module and report any dropped debug + /// information. Will call calculateDroppedVarStatsOnFunction on every + /// Function. + void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel); + /// Override base class method to run on an llvm::Function specifically. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap &InlinedAtsMap, + VarID Var) override; + /// Override base class method to run on #dbg_values specifically. + virtual void visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) override; + + template static const IRUnitT *unwrapIR(Any IR) { + const IRUnitT **IRPtr = llvm::any_cast(&IR); + return IRPtr ? *IRPtr : nullptr; + } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 9301a12c740ee..12a34c099eaff 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -579,83 +580,6 @@ class PrintCrashIRInstrumentation { static void SignalHandler(void *); }; -/// A class to collect and print dropped debug information variable statistics. -/// After every LLVM IR pass is run, it will print how many #dbg_values were -/// dropped due to that pass. -class DroppedVariableStats { -public: - DroppedVariableStats(bool DroppedVarStatsEnabled) { - if (DroppedVarStatsEnabled) - llvm::outs() - << "Pass Level, Pass Name, Num of Dropped Variables, Func or " - "Module Name\n"; - }; - // We intend this to be unique per-compilation, thus no copies. - DroppedVariableStats(const DroppedVariableStats &) = delete; - void operator=(const DroppedVariableStats &) = delete; - - void registerCallbacks(PassInstrumentationCallbacks &PIC); - void runBeforePass(StringRef PassID, Any IR); - void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); - void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); - bool getPassDroppedVariables() { return PassDroppedVariables; } - -private: - bool PassDroppedVariables = false; - /// A unique key that represents a #dbg_value. - using VarID = - std::tuple; - - struct DebugVariables { - /// DenseSet of VarIDs before an optimization pass has run. - DenseSet DebugVariablesBefore; - /// DenseSet of VarIDs after an optimization pass has run. - DenseSet DebugVariablesAfter; - }; - - /// A stack of a DenseMap, that maps DebugVariables for every pass to an - /// llvm::Function. A stack is used because an optimization pass can call - /// other passes. - SmallVector> DebugVariablesStack; - - /// A DenseSet tracking whether a scope was visited before. - DenseSet VisitedScope; - /// A stack of DenseMaps, which map the name of an llvm::Function to a - /// DenseMap of VarIDs and their inlinedAt locations before an optimization - /// pass has run. - SmallVector>> InlinedAts; - - /// Iterate over all Functions in a Module and report any dropped debug - /// information. Will call calculateDroppedVarStatsOnFunction on every - /// Function. - void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, - std::string FuncOrModName, - std::string PassLevel); - /// Iterate over all Instructions in a Function and report any dropped debug - /// information. - void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, - std::string FuncOrModName, - std::string PassLevel); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Function. - void runOnFunction(const Function *F, bool Before); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Module. Calls runOnFunction on every Function in the Module. - void runOnModule(const Module *M, bool Before); - /// Remove a dropped #dbg_value VarID from all Sets in the - /// DroppedVariablesBefore stack. - void removeVarFromAllSets(VarID Var, const Function *F); - /// Return true if \p Scope is the same as \p DbgValScope or a child scope of - /// \p DbgValScope, return false otherwise. - bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); - /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of - /// the InlinedAt chain, return false otherwise. - bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, - const DILocation *DbgValInlinedAt); -}; - /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -673,7 +597,7 @@ class StandardInstrumentations { PrintCrashIRInstrumentation PrintCrashIR; IRChangedTester ChangeTester; VerifyInstrumentation Verify; - DroppedVariableStats DroppedStats; + DroppedVariableStatsIR DroppedStatsIR; bool VerifyEach; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 7b47c0e6f75db..263d4a9ee94d2 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -50,6 +50,7 @@ add_llvm_component_library(LLVMCodeGen DeadMachineInstructionElim.cpp DetectDeadLanes.cpp DFAPacketizer.cpp + DroppedVariableStats.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp new file mode 100644 index 0000000000000..122fcad1293f1 --- /dev/null +++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp @@ -0,0 +1,194 @@ +///===- DroppedVariableStats.cpp ------------------------------------------===// +/// +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM +/// Exceptions. See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/// +///===---------------------------------------------------------------------===// +/// \file +/// Dropped Variable Statistics for Debug Information. Reports any number +/// of #dbg_value that get dropped due to an optimization pass. +/// +///===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/DroppedVariableStats.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +bool DroppedVariableStats::isScopeChildOfOrEqualTo(const DIScope *Scope, + const DIScope *DbgValScope) { + while (Scope != nullptr) { + if (VisitedScope.find(Scope) == VisitedScope.end()) { + VisitedScope.insert(Scope); + if (Scope == DbgValScope) { + VisitedScope.clear(); + return true; + } + Scope = Scope->getScope(); + } else { + VisitedScope.clear(); + return false; + } + } + return false; +} + +bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( + const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { + if (DbgValInlinedAt == InlinedAt) + return true; + if (!DbgValInlinedAt) + return false; + auto *IA = InlinedAt; + while (IA) { + if (IA == DbgValInlinedAt) + return true; + IA = IA->getInlinedAt(); + } + return false; +} + +void DroppedVariableStats::calculateDroppedStatsAndPrint( + DebugVariables &DbgVariables, StringRef FuncName, StringRef PassID, + StringRef FuncOrModName, StringRef PassLevel, const Function *Func) { + unsigned DroppedCount = 0; + DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; + DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; + DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; + // Find an Instruction that shares the same scope as the dropped #dbg_value or + // has a scope that is the child of the scope of the #dbg_value, and has an + // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain + // contains the inlinedAt of the #dbg_value, if such an Instruction is found, + // debug information is dropped. + for (VarID Var : DebugVariablesBeforeSet) { + if (DebugVariablesAfterSet.contains(Var)) + continue; + visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); + removeVarFromAllSets(Var, Func); + } + if (DroppedCount > 0) { + llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " + << FuncOrModName << "\n"; + PassDroppedVariables = true; + } else + PassDroppedVariables = false; +} + +bool DroppedVariableStats::updateDroppedCount( + DILocation *DbgLoc, const DIScope *Scope, const DIScope *DbgValScope, + DenseMap &InlinedAtsMap, VarID Var, + unsigned &DroppedCount) { + + // If the Scope is a child of, or equal to the DbgValScope and is inlined at + // the Var's InlinedAt location, return true to signify that the Var has been + // dropped. + if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) + if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), + InlinedAtsMap[Var])) { + // Found another instruction in the variable's scope, so there exists a + // break point at which the variable could be observed. Count it as + // dropped. + DroppedCount++; + return true; + } + return false; +} + +void DroppedVariableStats::run(DebugVariables &DbgVariables, StringRef FuncName, + bool Before) { + auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore + : DbgVariables.DebugVariablesAfter); + auto &InlinedAtsMap = InlinedAts.back(); + if (Before) + InlinedAtsMap.try_emplace(FuncName, DenseMap()); + VarIDSet = DenseSet(); + visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); +} + +void DroppedVariableStats::populateVarIDSetAndInlinedMap( + const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) { + VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; + VarIDSet.insert(Key); + if (Before) + InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); +} + +void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) { + auto &DebugVariables = DebugVariablesStack.back()[F]; + auto FuncName = F->getName(); + Func = F; + run(DebugVariables, FuncName, Before); +} + +void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction( + const Function *F, StringRef PassID, StringRef FuncOrModName, + StringRef PassLevel) { + Func = F; + StringRef FuncName = F->getName(); + DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; + calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, + PassLevel, Func); +} + +void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) { + for (auto &F : *M) + runOnFunction(&F, Before); +} + +void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule( + const Module *M, StringRef PassID, StringRef FuncOrModName, + StringRef PassLevel) { + for (auto &F : *M) { + calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); + } +} + +void DroppedVariableStatsIR::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!DroppedVariableStatsEnabled) + return; + + PIC.registerBeforeNonSkippedPassCallback( + [this](StringRef P, Any IR) { return runBeforePass(IR); }); + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &PA) { + return runAfterPass(P, IR); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &PA) { return cleanup(); }); +} + +void DroppedVariableStatsIR::visitEveryInstruction( + unsigned &DroppedCount, DenseMap &InlinedAtsMap, + VarID Var) { + const DIScope *DbgValScope = std::get<0>(Var); + for (const auto &I : instructions(Func)) { + auto *DbgLoc = I.getDebugLoc().get(); + if (!DbgLoc) + continue; + if (updateDroppedCount(DbgLoc, DbgLoc->getScope(), DbgValScope, + InlinedAtsMap, Var, DroppedCount)) + break; + } +} + +void DroppedVariableStatsIR::visitEveryDebugRecord( + DenseSet &VarIDSet, + DenseMap> &InlinedAtsMap, + StringRef FuncName, bool Before) { + for (const auto &I : instructions(Func)) { + for (DbgRecord &DR : I.getDbgRecordRange()) { + if (auto *Dbg = dyn_cast(&DR)) { + auto *DbgVar = Dbg->getVariable(); + auto DbgLoc = DR.getDebugLoc(); + populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, + FuncName, Before); + } + } + } +} diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 6259f8f736c80..b766517e68eba 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -2462,7 +2462,7 @@ StandardInstrumentations::StandardInstrumentations( PrintChanged == ChangePrinter::ColourDiffVerbose || PrintChanged == ChangePrinter::ColourDiffQuiet), WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), - Verify(DebugLogging), DroppedStats(DroppedVarStats), + Verify(DebugLogging), DroppedStatsIR(DroppedVarStats), VerifyEach(VerifyEach) {} PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = @@ -2523,180 +2523,6 @@ void PrintCrashIRInstrumentation::registerCallbacks( }); } -void DroppedVariableStats::registerCallbacks( - PassInstrumentationCallbacks &PIC) { - if (!DroppedVarStats) - return; - - PIC.registerBeforeNonSkippedPassCallback( - [this](StringRef P, Any IR) { return this->runBeforePass(P, IR); }); - PIC.registerAfterPassCallback( - [this](StringRef P, Any IR, const PreservedAnalyses &PA) { - return this->runAfterPass(P, IR, PA); - }); - PIC.registerAfterPassInvalidatedCallback( - [this](StringRef P, const PreservedAnalyses &PA) { - return this->runAfterPassInvalidated(P, PA); - }); -} - -void DroppedVariableStats::runBeforePass(StringRef PassID, Any IR) { - DebugVariablesStack.push_back({DenseMap()}); - InlinedAts.push_back({DenseMap>()}); - if (auto *M = unwrapIR(IR)) - return this->runOnModule(M, true); - if (auto *F = unwrapIR(IR)) - return this->runOnFunction(F, true); -} - -void DroppedVariableStats::runOnFunction(const Function *F, bool Before) { - auto &DebugVariables = DebugVariablesStack.back()[F]; - auto &VarIDSet = (Before ? DebugVariables.DebugVariablesBefore - : DebugVariables.DebugVariablesAfter); - auto &InlinedAtsMap = InlinedAts.back(); - auto FuncName = F->getName(); - if (Before) - InlinedAtsMap.try_emplace(FuncName, DenseMap()); - VarIDSet = DenseSet(); - for (const auto &I : instructions(F)) { - for (DbgRecord &DR : I.getDbgRecordRange()) { - if (auto *Dbg = dyn_cast(&DR)) { - auto *DbgVar = Dbg->getVariable(); - auto DbgLoc = DR.getDebugLoc(); - VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; - VarIDSet.insert(Key); - if (Before) - InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); - } - } - } -} - -void DroppedVariableStats::runOnModule(const Module *M, bool Before) { - for (auto &F : *M) - runOnFunction(&F, Before); -} - -void DroppedVariableStats::removeVarFromAllSets(VarID Var, const Function *F) { - // Do not remove Var from the last element, it will be popped from the stack. - for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) - DebugVariablesMap[F].DebugVariablesBefore.erase(Var); -} - -void DroppedVariableStats::calculateDroppedVarStatsOnModule( - const Module *M, StringRef PassID, std::string FuncOrModName, - std::string PassLevel) { - for (auto &F : *M) { - calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); - } -} - -void DroppedVariableStats::calculateDroppedVarStatsOnFunction( - const Function *F, StringRef PassID, std::string FuncOrModName, - std::string PassLevel) { - unsigned DroppedCount = 0; - StringRef FuncName = F->getName(); - DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; - DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; - DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; - DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; - // Find an Instruction that shares the same scope as the dropped #dbg_value or - // has a scope that is the child of the scope of the #dbg_value, and has an - // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain - // contains the inlinedAt of the #dbg_value, if such an Instruction is found, - // debug information is dropped. - for (VarID Var : DebugVariablesBeforeSet) { - if (DebugVariablesAfterSet.contains(Var)) - continue; - const DIScope *DbgValScope = std::get<0>(Var); - for (const auto &I : instructions(F)) { - auto *DbgLoc = I.getDebugLoc().get(); - if (!DbgLoc) - continue; - - auto *Scope = DbgLoc->getScope(); - if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) { - if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), - InlinedAtsMap[Var])) { - // Found another instruction in the variable's scope, so there exists - // a break point at which the variable could be observed. Count it as - // dropped. - DroppedCount++; - break; - } - } - } - removeVarFromAllSets(Var, F); - } - if (DroppedCount > 0) { - llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " - << FuncOrModName << "\n"; - PassDroppedVariables = true; - } else - PassDroppedVariables = false; -} - -void DroppedVariableStats::runAfterPassInvalidated( - StringRef PassID, const PreservedAnalyses &PA) { - DebugVariablesStack.pop_back(); - InlinedAts.pop_back(); -} - -void DroppedVariableStats::runAfterPass(StringRef PassID, Any IR, - const PreservedAnalyses &PA) { - std::string PassLevel; - std::string FuncOrModName; - if (auto *M = unwrapIR(IR)) { - this->runOnModule(M, false); - PassLevel = "Module"; - FuncOrModName = M->getName(); - calculateDroppedVarStatsOnModule(M, PassID, FuncOrModName, PassLevel); - } else if (auto *F = unwrapIR(IR)) { - this->runOnFunction(F, false); - PassLevel = "Function"; - FuncOrModName = F->getName(); - calculateDroppedVarStatsOnFunction(F, PassID, FuncOrModName, PassLevel); - } - - DebugVariablesStack.pop_back(); - InlinedAts.pop_back(); -} - -bool DroppedVariableStats::isScopeChildOfOrEqualTo(DIScope *Scope, - const DIScope *DbgValScope) { - while (Scope != nullptr) { - if (VisitedScope.find(Scope) == VisitedScope.end()) { - VisitedScope.insert(Scope); - if (Scope == DbgValScope) { - VisitedScope.clear(); - return true; - } - Scope = Scope->getScope(); - } else { - VisitedScope.clear(); - return false; - } - } - return false; -} - -bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( - const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { - if (DbgValInlinedAt == InlinedAt) - return true; - if (!DbgValInlinedAt) - return false; - if (!InlinedAt) - return false; - auto *IA = InlinedAt; - while (IA) { - if (IA == DbgValInlinedAt) - return true; - IA = IA->getInlinedAt(); - } - return false; -} - void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) { PrintIR.registerCallbacks(PIC); @@ -2712,7 +2538,7 @@ void StandardInstrumentations::registerCallbacks( WebsiteChangeReporter.registerCallbacks(PIC); ChangeTester.registerCallbacks(PIC); PrintCrashIR.registerCallbacks(PIC); - DroppedStats.registerCallbacks(PIC); + DroppedStatsIR.registerCallbacks(PIC); if (MAM) PreservedCFGChecker.registerCallbacks(PIC, *MAM); diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 963cdcc0275e1..807fd1a9b7b56 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_unittest(CodeGenTests CCStateTest.cpp DIEHashTest.cpp DIETest.cpp + DroppedVariableStatsIRTest.cpp DwarfStringPoolEntryRefTest.cpp InstrRefLDVTest.cpp LowLevelTypeTest.cpp diff --git a/llvm/unittests/IR/DroppedVariableStatsTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp similarity index 91% rename from llvm/unittests/IR/DroppedVariableStatsTest.cpp rename to llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp index 61f3a87bb355e..094ec7b657634 100644 --- a/llvm/unittests/IR/DroppedVariableStatsTest.cpp +++ b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp @@ -1,5 +1,4 @@ -//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests -//----------===// +//===- unittests/IR/DroppedVariableStatsIRTest.cpp ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,7 +44,7 @@ namespace { // This test ensures that if a #dbg_value and an instruction that exists in the // same scope as that #dbg_value are both deleted as a result of an optimization // pass, debug information is considered not dropped. -TEST(DroppedVariableStats, BothDeleted) { +TEST(DroppedVariableStatsIR, BothDeleted) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -79,9 +79,8 @@ TEST(DroppedVariableStats, BothDeleted) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -92,16 +91,15 @@ TEST(DroppedVariableStats, BothDeleted) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that shares the same scope as the #dbg_value still exists, // debug information is conisdered dropped. -TEST(DroppedVariableStats, DbgValLost) { +TEST(DroppedVariableStatsIR, DbgValLost) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -136,9 +134,8 @@ TEST(DroppedVariableStats, DbgValLost) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -148,16 +145,15 @@ TEST(DroppedVariableStats, DbgValLost) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has an unrelated scope as the #dbg_value still // exists, debug information is conisdered not dropped. -TEST(DroppedVariableStats, UnrelatedScopes) { +TEST(DroppedVariableStatsIR, UnrelatedScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -193,9 +189,8 @@ TEST(DroppedVariableStats, UnrelatedScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -205,16 +200,15 @@ TEST(DroppedVariableStats, UnrelatedScopes) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, debug information is conisdered dropped. -TEST(DroppedVariableStats, ChildScopes) { +TEST(DroppedVariableStatsIR, ChildScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -250,9 +244,8 @@ TEST(DroppedVariableStats, ChildScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -262,9 +255,8 @@ TEST(DroppedVariableStats, ChildScopes) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -272,7 +264,7 @@ TEST(DroppedVariableStats, ChildScopes) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value is inlined at another location, debug // information is conisdered not dropped. -TEST(DroppedVariableStats, InlinedAt) { +TEST(DroppedVariableStatsIR, InlinedAt) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -308,9 +300,8 @@ TEST(DroppedVariableStats, InlinedAt) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -320,9 +311,8 @@ TEST(DroppedVariableStats, InlinedAt) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } @@ -330,7 +320,7 @@ TEST(DroppedVariableStats, InlinedAt) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value and the instruction are inlined at another // location, debug information is conisdered dropped. -TEST(DroppedVariableStats, InlinedAtShared) { +TEST(DroppedVariableStatsIR, InlinedAtShared) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -366,9 +356,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -378,9 +367,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -388,7 +376,7 @@ TEST(DroppedVariableStats, InlinedAtShared) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the instruction is inlined at a location that is the // #dbg_value's inlined at location, debug information is conisdered dropped. -TEST(DroppedVariableStats, InlinedAtChild) { +TEST(DroppedVariableStatsIR, InlinedAtChild) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -425,9 +413,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStats Stats(true); - Stats.runBeforePass("Test", - llvm::Any(const_cast(M.get()))); + DroppedVariableStatsIR Stats(true); + Stats.runBeforePass(llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -437,9 +424,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { } break; } - PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get())), PA); + llvm::Any(const_cast(M.get()))); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index ed93ee547d223..e5c8630f3eed7 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,7 +43,6 @@ add_llvm_unittest(IRTests ShuffleVectorInstTest.cpp StructuralHashTest.cpp TimePassesTest.cpp - DroppedVariableStatsTest.cpp TypesTest.cpp UseTest.cpp UserTest.cpp From 1e7171f692d0fad37aad0674c6b7c904540a9a0c Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 3 Dec 2024 22:13:40 +0000 Subject: [PATCH 139/191] [AArch64] Add tablegen patterns for concat(extract-high, extract-high) (#118286) A `concat(extract-high(x), extract-high(y))` is the top half of x inserted into the bottom half of y. This patch adds a tablegen pattern to make sure that we generate a single i64 lane insert. --- .../lib/Target/AArch64/AArch64InstrFormats.td | 4 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 +++--- llvm/test/CodeGen/AArch64/concat-vector.ll | 82 ++++++------------- llvm/test/CodeGen/AArch64/vecreduce-add.ll | 3 +- 4 files changed, 46 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index a8ba89f784c8c..56ff7b0d3a280 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -145,8 +145,12 @@ def gi_extract_high_v4i32 : def extract_high_v8f16 : ComplexPattern; +def extract_high_v8bf16 : + ComplexPattern; def extract_high_v4f32 : ComplexPattern; +def extract_high_v2f64 : + ComplexPattern; def gi_extract_high_v8f16 : GIComplexOperandMatcher, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7614f6215b803..d015cc15581ad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7352,7 +7352,8 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be // INS. -multiclass ConcatPat { +multiclass ConcatPat { def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; @@ -7365,16 +7366,22 @@ multiclass ConcatPat { // If the high lanes are undef we can just ignore them: def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; -} -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; -defm : ConcatPat; + // Concatting the high half of two vectors is the insert of the first + // into the low half of the second. + def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)), + (ExtractHigh (DstTy V128:$Rm)))), + (INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>; +} + +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; +defm : ConcatPat; //---------------------------------------------------------------------------- // AdvSIMD across lanes instructions diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index 36583b89ce5fc..0daa6e7f16202 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -385,19 +385,11 @@ entry: } define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v8i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v8i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> @@ -406,19 +398,11 @@ entry: } define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v8f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v8f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v8f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> %shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> @@ -427,19 +411,11 @@ entry: } define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v8bf16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v8bf16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v8bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> %shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> @@ -455,9 +431,8 @@ define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) { ; ; CHECK-GI-LABEL: concat_high_high_v4i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v1.d[0], v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: %shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> @@ -474,9 +449,8 @@ define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_ve ; ; CHECK-GI-LABEL: concat_high_high_v4f32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v1.d[0], v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: %shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> @@ -486,19 +460,11 @@ entry: } define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) { -; CHECK-SD-LABEL: concat_high_high_v16i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_high_high_v16i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d0, v0.d[1] -; CHECK-GI-NEXT: mov d1, v1.d[1] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_high_high_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v1.d[0], v0.d[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret entry: %shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> %shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index 184aa0226fe77..8473f45f6c803 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -4885,8 +4885,7 @@ entry: define i32 @extract_hi_hi(<8 x i16> %a) { ; CHECK-SD-LABEL: extract_hi_hi: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v0.d[0] +; CHECK-SD-NEXT: mov v0.d[0], v0.d[1] ; CHECK-SD-NEXT: uaddlv s0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret From e2472d3b120659ba7cd0e0dd0c6bf1260957fe47 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 3 Dec 2024 14:25:05 -0800 Subject: [PATCH 140/191] Rework attr-target-x86 test (#117091) Rework the attr-target-x86 test so the CHECK lines for the attributes are next to their corresponding `__attribute__`. --- clang/test/CodeGen/attr-target-x86.c | 153 ++++++++++++++++----------- 1 file changed, 89 insertions(+), 64 deletions(-) diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index 2033a8b4c335f..75e6dd18be209 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -1,80 +1,105 @@ // RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 -emit-llvm %s -o - | FileCheck %s -int baz(int a) { return 4; } +// CHECK: define {{.*}}@f_default({{.*}} [[f_default:#[0-9]+]] +// CHECK: define {{.*}}@f_avx_sse4_2_ivybridge({{.*}} [[f_avx_sse4_2_ivybridge:#[0-9]+]] +// CHECK: define {{.*}}@f_fpmath_387({{.*}} [[f_default]] +// CHECK: define {{.*}}@f_no_sse2({{.*}} [[f_no_sse2:#[0-9]+]] +// CHECK: define {{.*}}@f_sse4({{.*}} [[f_sse4:#[0-9]+]] +// CHECK: define {{.*}}@f_no_sse4({{.*}} [[f_no_sse4:#[0-9]+]] +// CHECK: define {{.*}}@f_default2({{.*}} [[f_default]] +// CHECK: define {{.*}}@f_avx_sse4_2_ivybridge_2({{.*}} [[f_avx_sse4_2_ivybridge]] +// CHECK: define {{.*}}@f_no_aes_ivybridge({{.*}} [[f_no_aes_ivybridge:#[0-9]+]] +// CHECK: define {{.*}}@f_no_mmx({{.*}} [[f_no_mmx:#[0-9]+]] +// CHECK: define {{.*}}@f_lakemont_mmx({{.*}} [[f_lakemont_mmx:#[0-9]+]] +// CHECK: define {{.*}}@f_use_before_def({{.*}} [[f_lakemont_mmx]] +// CHECK: define {{.*}}@f_tune_sandybridge({{.*}} [[f_tune_sandybridge:#[0-9]+]] +// CHECK: define {{.*}}@f_x86_64_v2({{.*}} [[f_x86_64_v2:#[0-9]+]] +// CHECK: define {{.*}}@f_x86_64_v3({{.*}} [[f_x86_64_v3:#[0-9]+]] +// CHECK: define {{.*}}@f_x86_64_v4({{.*}} [[f_x86_64_v4:#[0-9]+]] +// CHECK: define {{.*}}@f_avx10_1_256{{.*}} [[f_avx10_1_256:#[0-9]+]] +// CHECK: define {{.*}}@f_avx10_1_512{{.*}} [[f_avx10_1_512:#[0-9]+]] + +// CHECK: [[f_default]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" +void f_default(void) {} + +// CHECK: [[f_avx_sse4_2_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" +__attribute__((target("avx,sse4.2,arch=ivybridge"))) +void f_avx_sse4_2_ivybridge(void) {} + +// We're currently ignoring the fpmath attribute. So checked above that +// attributes are identical to f_default. +__attribute__((target("fpmath=387"))) +void f_fpmath_387(void) {} -int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; } - -int __attribute__((target("fpmath=387"))) koala(int a) { return 4; } - -int __attribute__((target("no-sse2"))) echidna(int a) { return 4; } - -int __attribute__((target("sse4"))) panda(int a) { return 4; } -int __attribute__((target("no-sse4"))) narwhal(int a) { return 4; } +// CHECK-NOT: tune-cpu +// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +__attribute__((target("no-sse2"))) +void f_no_sse2(void) {} + +// CHECK: [[f_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" +__attribute__((target("sse4"))) +void f_sse4(void) {} + +// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +__attribute__((target("no-sse4"))) +void f_no_sse4(void) {} + +// checked above that attributes are identical to f_default +void f_default2(void) { + f_avx_sse4_2_ivybridge(); + return f_default(); +} -int bar(int a) { return baz(a) + foo(a); } +// Checked above to have same attributes as f_avx_sse4_2_ivybridge +__attribute__((target("avx, sse4.2, arch= ivybridge"))) +void f_avx_sse4_2_ivybridge_2(void) {} -int __attribute__((target("avx, sse4.2, arch= ivybridge"))) qux(int a) { return 4; } -int __attribute__((target("no-aes, arch=ivybridge"))) qax(int a) { return 4; } +// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" +__attribute__((target("no-aes, arch=ivybridge"))) +void f_no_aes_ivybridge(void) {} -int __attribute__((target("no-mmx"))) qq(int a) { return 40; } +// CHECK-NOT: tune-cpu +// CHECK: [[f_no_mmx]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx" +__attribute__((target("no-mmx"))) +void f_no_mmx(void) {} -int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; } +// CHECK: [[f_lakemont_mmx]] = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" +// Adding the attribute to a definition does update it in IR. +__attribute__((target("arch=lakemont,mmx"))) +void f_lakemont_mmx(void) {} -int use_before_def(void); -int useage(void){ - return use_before_def(); +void f_use_before_def(void); +void usage(void){ + f_use_before_def(); } -// Adding the attribute to a definition does update it in IR. -int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) { - return 5; -} +// Checked above to have same attributes as f_lakemont_mmx +__attribute__((target("arch=lakemont,mmx"))) +void f_use_before_def(void) {} -int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; } - -void __attribute__((target("arch=x86-64-v2"))) x86_64_v2(void) {} -void __attribute__((target("arch=x86-64-v3"))) x86_64_v3(void) {} -void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {} - -void __attribute__((target("avx10.1-256"))) avx10_1_256(void) {} -void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {} - -// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar. -// CHECK: baz{{.*}} #0 -// CHECK: foo{{.*}} #1 -// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar. -// CHECK: koala{{.*}} #0 -// CHECK: echidna{{.*}} #2 -// CHECK: panda{{.*}} #3 -// CHECK: narwhal{{.*}} #4 -// CHECK: bar{{.*}} #0 -// CHECK: qux{{.*}} #1 -// CHECK: qax{{.*}} #5 -// CHECK: qq{{.*}} #6 -// CHECK: lake{{.*}} #7 -// CHECK: use_before_def{{.*}} #7 -// CHECK: walrus{{.*}} #8 -// CHECK: avx10_1_256{{.*}} #12 -// CHECK: avx10_1_512{{.*}} #13 -// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" -// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" -// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" -// CHECK-NOT: tune-cpu -// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx" -// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" -// CHECK-NOT: tune-cpu -// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge" +// CHECK: [[f_tune_sandybridge]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge" +__attribute__((target("tune=sandybridge"))) +void f_tune_sandybridge(void) {} -// CHECK: "target-cpu"="x86-64-v2" +// CHECK: [[f_x86_64_v2]] ={{.*}}"target-cpu"="x86-64-v2" // CHECK-SAME: "target-features"="+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: "target-cpu"="x86-64-v3" +__attribute__((target("arch=x86-64-v2"))) +void f_x86_64_v2(void) {} + +// CHECK: [[f_x86_64_v3]] = {{.*}}"target-cpu"="x86-64-v3" // CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: "target-cpu"="x86-64-v4" +__attribute__((target("arch=x86-64-v3"))) +void f_x86_64_v3(void) {} + +// CHECK: [[f_x86_64_v4]] = {{.*}}"target-cpu"="x86-64-v4" // CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +__attribute__((target("arch=x86-64-v4"))) +void f_x86_64_v4(void) {} + +// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" +__attribute__((target("avx10.1-256"))) +void f_avx10_1_256(void) {} -// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" -// CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" +// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" +__attribute__((target("avx10.1-512"))) +void f_avx10_1_512(void) {} From b206ba1867763a2b09e33649446599538c84d334 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 3 Dec 2024 22:26:48 +0000 Subject: [PATCH 141/191] [gn build] Port 0c8928d456ac --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index ab72ac4ae9f4b..bcb46d919b6c1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -50,6 +50,7 @@ static_library("CodeGen") { "DFAPacketizer.cpp", "DeadMachineInstructionElim.cpp", "DetectDeadLanes.cpp", + "DroppedVariableStats.cpp", "DwarfEHPrepare.cpp", "EHContGuardCatchret.cpp", "EarlyIfConversion.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index a3f89a5648cb5..dc01cc9a40a9c 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -26,6 +26,7 @@ unittest("CodeGenTests") { "CCStateTest.cpp", "DIEHashTest.cpp", "DIETest.cpp", + "DroppedVariableStatsIRTest.cpp", "DwarfStringPoolEntryRefTest.cpp", "InstrRefLDVTest.cpp", "LexicalScopesTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index ccee5d79afdcc..b19d54d7ed92f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -28,7 +28,6 @@ unittest("IRTests") { "DemandedBitsTest.cpp", "DominatorTreeBatchUpdatesTest.cpp", "DominatorTreeTest.cpp", - "DroppedVariableStatsTest.cpp", "FunctionTest.cpp", "IRBuilderTest.cpp", "InstructionsTest.cpp", From f2fa9ac6169758268bc16c46ec80da2e88ad7f2c Mon Sep 17 00:00:00 2001 From: k-kashapov <52855633+k-kashapov@users.noreply.github.com> Date: Wed, 4 Dec 2024 01:32:54 +0300 Subject: [PATCH 142/191] [nfc][MSan] Change for-loop to ArgNo instead of drop_begin (#117553) As discussed in https://github.com/llvm/llvm-project/pull/109284#discussion_r1838830571 Changed for loop to use `ArgNo` instead of `drop_begin` to keep loop code consistent with other helpers. Co-authored-by: Kamil Kashapov --- llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index dca6bf1adfde8..934500509873f 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -6159,8 +6159,10 @@ struct VarArgGenericHelper : public VarArgHelperBase { unsigned VAArgOffset = 0; const DataLayout &DL = F.getDataLayout(); unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); - for (Value *A : - llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) { + for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) { + bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); + if (IsFixed) + continue; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); if (DL.isBigEndian()) { // Adjusting the shadow for argument with size < IntptrSize to match the From c7d38591be8b5c581c228313d2972cb758221cc2 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 4 Dec 2024 06:43:29 +0800 Subject: [PATCH 143/191] [ast matcher] add `ExportDecl` in dynamically matchers (#118258) --- clang/lib/ASTMatchers/Dynamic/Registry.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index 8d36ad5c80b5d..837633fb2f060 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -243,6 +243,7 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(equalsBoundNode); REGISTER_MATCHER(equalsIntegralValue); REGISTER_MATCHER(explicitCastExpr); + REGISTER_MATCHER(exportDecl); REGISTER_MATCHER(expr); REGISTER_MATCHER(exprWithCleanups); REGISTER_MATCHER(fieldDecl); From ea6cdb9a0708330089d583ce20aeaf81eec94ff7 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 3 Dec 2024 15:01:28 -0800 Subject: [PATCH 144/191] allow prefer 256 bit attribute target (#117092) This allows `__attribute__((target("prefer-256-bit")))` / `__attribute__((target("no-prefer-256-bit")))` to create variants of a functions with 256/512 bit vector sizes within the same application. --- clang/lib/Basic/Targets/X86.cpp | 1 + clang/test/CodeGen/attr-target-x86.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 8c31bbe056741..1b16888a0711b 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1162,6 +1162,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("pconfig", true) .Case("pku", true) .Case("popcnt", true) + .Case("prefer-256-bit", true) .Case("prefetchi", true) .Case("prfchw", true) .Case("ptwrite", true) diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index 75e6dd18be209..c92aad633082f 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -18,6 +18,8 @@ // CHECK: define {{.*}}@f_x86_64_v4({{.*}} [[f_x86_64_v4:#[0-9]+]] // CHECK: define {{.*}}@f_avx10_1_256{{.*}} [[f_avx10_1_256:#[0-9]+]] // CHECK: define {{.*}}@f_avx10_1_512{{.*}} [[f_avx10_1_512:#[0-9]+]] +// CHECK: define {{.*}}@f_prefer_256_bit({{.*}} [[f_prefer_256_bit:#[0-9]+]] +// CHECK: define {{.*}}@f_no_prefer_256_bit({{.*}} [[f_no_prefer_256_bit:#[0-9]+]] // CHECK: [[f_default]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" void f_default(void) {} @@ -103,3 +105,11 @@ void f_avx10_1_256(void) {} // CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" __attribute__((target("avx10.1-512"))) void f_avx10_1_512(void) {} + +// CHECK: [[f_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}+prefer-256-bit +__attribute__((target("prefer-256-bit"))) +void f_prefer_256_bit(void) {} + +// CHECK: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit +__attribute__((target("no-prefer-256-bit"))) +void f_no_prefer_256_bit(void) {} \ No newline at end of file From c8b7ec2edd8d84729070e455002d1e78bdceddc5 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 3 Dec 2024 15:17:39 -0800 Subject: [PATCH 145/191] Define a diagnostic group for missing variadic macro arguments (#116855) Make the new diagnostic group a subgroup of the following diagnostic groups: -Wpre-c23-compat -Wgnu-zero-variadic-macro-arguments -Wc++20-extensions -Wc23-extensions This change is needed as 5231005193afb8db01afe9a8a1aa308d25f60ba1 made it impossible to use -Wno-gnu-zero-variadic-macro-argumentsis to silence the warning. rdar://139234984 --- clang/include/clang/Basic/DiagnosticGroups.td | 10 ++++---- .../include/clang/Basic/DiagnosticLexKinds.td | 6 ++--- clang/test/Lexer/gnu-flags.c | 2 ++ clang/test/Preprocessor/macro_fn.c | 24 +++++++++++++++---- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index ac0e178d1cb41..3ac490d30371b 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -294,11 +294,13 @@ def : DiagGroup<"c++1z-compat-mangling", [CXX17CompatMangling]>; // Name of this warning in GCC. def NoexceptType : DiagGroup<"noexcept-type", [CXX17CompatMangling]>; +def VariadicMacroArgumentsOmitted : DiagGroup<"variadic-macro-arguments-omitted">; + // Warnings for C code which is not compatible with previous C standards. def CPre11Compat : DiagGroup<"pre-c11-compat">; def CPre11CompatPedantic : DiagGroup<"pre-c11-compat-pedantic", [CPre11Compat]>; -def CPre23Compat : DiagGroup<"pre-c23-compat">; +def CPre23Compat : DiagGroup<"pre-c23-compat", [VariadicMacroArgumentsOmitted]>; def CPre23CompatPedantic : DiagGroup<"pre-c23-compat-pedantic", [CPre23Compat]>; def : DiagGroup<"pre-c2x-compat", [CPre23Compat]>; @@ -906,7 +908,7 @@ def VolatileRegisterVar : DiagGroup<"volatile-register-var">; def Visibility : DiagGroup<"visibility">; def ZeroLengthArray : DiagGroup<"zero-length-array">; def GNUZeroLineDirective : DiagGroup<"gnu-zero-line-directive">; -def GNUZeroVariadicMacroArguments : DiagGroup<"gnu-zero-variadic-macro-arguments">; +def GNUZeroVariadicMacroArguments : DiagGroup<"gnu-zero-variadic-macro-arguments", [VariadicMacroArgumentsOmitted]>; def MisleadingIndentation : DiagGroup<"misleading-indentation">; def PtrAuthNullPointers : DiagGroup<"ptrauth-null-pointers">; @@ -1199,7 +1201,7 @@ def CXX17 : DiagGroup<"c++17-extensions", [CXX17Attrs]>; // A warning group for warnings about using C++20 features as extensions in // earlier C++ versions. -def CXX20 : DiagGroup<"c++20-extensions", [CXX20Designator, CXX20Attrs]>; +def CXX20 : DiagGroup<"c++20-extensions", [CXX20Designator, CXX20Attrs, VariadicMacroArgumentsOmitted]>; // A warning group for warnings about using C++23 features as extensions in // earlier C++ versions. @@ -1226,7 +1228,7 @@ def C11 : DiagGroup<"c11-extensions">; def C99 : DiagGroup<"c99-extensions", [C99Designator]>; // A warning group for warnings about using C23 features as extensions. -def C23 : DiagGroup<"c23-extensions">; +def C23 : DiagGroup<"c23-extensions", [VariadicMacroArgumentsOmitted]>; def : DiagGroup<"c2x-extensions", [C23]>; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 889370221f32f..959376b084721 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -486,14 +486,14 @@ def ext_embedded_directive : Extension< InGroup>; def ext_c_missing_varargs_arg : Extension< "passing no argument for the '...' parameter of a variadic macro is " - "a C23 extension">, InGroup; + "a C23 extension">, InGroup; def ext_cxx_missing_varargs_arg : Extension< "passing no argument for the '...' parameter of a variadic macro is " - "a C++20 extension">, InGroup; + "a C++20 extension">, InGroup; def warn_c17_compat_missing_varargs_arg : Warning< "passing no argument for the '...' parameter of a variadic macro is " "incompatible with C standards before C23">, - InGroup, DefaultIgnore; + InGroup, DefaultIgnore; def warn_cxx17_compat_missing_varargs_arg : Warning< "passing no argument for the '...' parameter of a variadic macro is " "incompatible with C++ standards before C++20">, diff --git a/clang/test/Lexer/gnu-flags.c b/clang/test/Lexer/gnu-flags.c index 6c7bf9405ddf0..30cfcf710f346 100644 --- a/clang/test/Lexer/gnu-flags.c +++ b/clang/test/Lexer/gnu-flags.c @@ -16,6 +16,8 @@ #if ALL || ZEROARGS +// expected-warning@+9 {{passing no argument for the '...' parameter of a variadic macro is a C23 extension}} +// expected-note@+4 {{macro 'efoo' defined here}} // expected-warning@+3 {{token pasting of ',' and __VA_ARGS__ is a GNU extension}} #endif diff --git a/clang/test/Preprocessor/macro_fn.c b/clang/test/Preprocessor/macro_fn.c index 81d8363214078..2e72bd272084e 100644 --- a/clang/test/Preprocessor/macro_fn.c +++ b/clang/test/Preprocessor/macro_fn.c @@ -1,11 +1,17 @@ /* RUN: %clang_cc1 %s -Eonly -std=c89 -pedantic -verify */ +// RUN: %clang_cc1 %s -Eonly -std=c89 -pedantic -Wno-gnu-zero-variadic-macro-arguments -verify -DOMIT_VARIADIC_MACRO_ARGS -DVARIADIC_MACRO_ARGS_REMOVE_COMMA +// RUN: %clang_cc1 %s -Eonly -std=c89 -pedantic -Wno-variadic-macro-arguments-omitted -verify -DOMIT_VARIADIC_MACRO_ARGS /* PR3937 */ #define zero() 0 /* expected-note 2 {{defined here}} */ #define one(x) 0 /* expected-note 2 {{defined here}} */ #define two(x, y) 0 /* expected-note 4 {{defined here}} */ #define zero_dot(...) 0 /* expected-warning {{variadic macros are a C99 feature}} */ -#define one_dot(x, ...) 0 /* expected-warning {{variadic macros are a C99 feature}} expected-note 2{{macro 'one_dot' defined here}} */ +#define one_dot(x, ...) 0 /* expected-warning {{variadic macros are a C99 feature}} */ + +#ifndef OMIT_VARIADIC_MACRO_ARGS +/* expected-note@-3 2{{macro 'one_dot' defined here}} */ +#endif zero() zero(1); /* expected-error {{too many arguments provided to function-like macro invocation}} */ @@ -37,16 +43,24 @@ e(x) e() zero_dot() -one_dot(x) /* empty ... argument: expected-warning {{passing no argument for the '...' parameter of a variadic macro is a C23 extension}} */ -one_dot() /* empty first argument, elided ...: expected-warning {{passing no argument for the '...' parameter of a variadic macro is a C23 extension}} */ +one_dot(x) /* empty ... argument */ +one_dot() /* empty first argument, elided ... */ +#ifndef OMIT_VARIADIC_MACRO_ARGS +/* expected-warning@-4 {{passing no argument for the '...' parameter of a variadic macro is a C23 extension}} */ +/* expected-warning@-4 {{passing no argument for the '...' parameter of a variadic macro is a C23 extension}} */ +#endif /* Crash with function-like macro test at end of directive. */ #define E() (i == 0) #if E #endif - #define NSAssert(condition, desc, ...) /* expected-warning {{variadic macros are a C99 feature}} */ \ - SomeComplicatedStuff((desc), ##__VA_ARGS__) /* expected-warning {{token pasting of ',' and __VA_ARGS__ is a GNU extension}} */ + SomeComplicatedStuff((desc), ##__VA_ARGS__) + +#ifndef VARIADIC_MACRO_ARGS_REMOVE_COMMA +/* expected-warning@-3 {{token pasting of ',' and __VA_ARGS__ is a GNU extension}} */ +#endif + NSAssert(somecond, somedesc) From 1afb81dfaf902c1c42bd91fec1a7385e6e1529d3 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Tue, 3 Dec 2024 16:01:50 -0800 Subject: [PATCH 146/191] [StructuralHash] Global Variable (#118412) This update enhances the implementation of structural hashing for global variables, using their initial contents. Private global variables or constants are often used for metadata, where their names are not unique. This can lead to the creation of different hash results although they could be merged by the linker as they are effectively identical. - Refine the hashing of GlobalVariables for strings or certain Objective-C metadata cases that have section names. This can be further extended to other scenarios. - Expose StructuralHash for GlobalVariable so that this API can be utilized by MachineStableHashing, which is also employed in the global function outliner. This change significantly improves size reduction by an additional 1% on the LLD binary when the global function outliner and merger are enabled together. As discussed in the RFC https://discourse.llvm.org/t/loh-conflicting-with-machineoutliner/83279/8?u=kyulee-com, if we disable or relocate the LOH pass, the size impact could increase to 4%. --- llvm/include/llvm/IR/StructuralHash.h | 3 + llvm/lib/CodeGen/MachineStableHash.cpp | 20 ++++-- llvm/lib/IR/StructuralHash.cpp | 54 ++++++++++++++-- .../AArch64/cgdata-merge-gvar-nsconst.ll | 32 ++++++++++ .../CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 38 +++++++++++ .../AArch64/cgdata-merge-gvar-string.ll | 46 +++++++++++++ .../CodeGen/AArch64/cgdata-outline-gvar.ll | 64 +++++++++++++++++++ 7 files changed, 244 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 071575137ff57..514dd6f174b90 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -31,6 +31,9 @@ class Module; /// to true includes instruction and operand type information. stable_hash StructuralHash(const Function &F, bool DetailedHash = false); +/// Returns a hash of the global variable \p G. +stable_hash StructuralHash(const GlobalVariable &G); + /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index facda7a59e2f8..5ab589acee413 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -27,6 +27,8 @@ #include "llvm/CodeGen/Register.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/ErrorHandling.h" @@ -93,13 +95,19 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { return 0; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); - if (!GV->hasName()) { - ++StableHashBailingGlobalAddress; - return 0; + stable_hash GVHash = 0; + if (auto *GVar = dyn_cast(GV)) + GVHash = StructuralHash(*GVar); + if (!GVHash) { + if (!GV->hasName()) { + ++StableHashBailingGlobalAddress; + return 0; + } + GVHash = stable_hash_name(GV->getName()); } - auto Name = GV->getName(); - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - stable_hash_name(Name), MO.getOffset()); + + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash, + MO.getOffset()); } case MachineOperand::MO_TargetIndex: { diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index ccc534a890419..56b925626d845 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -46,7 +46,7 @@ class StructuralHashImpl { /// Assign a unique ID to each Value in the order they are first seen. DenseMap ValueToId; - stable_hash hashType(Type *ValueType) { + static stable_hash hashType(Type *ValueType) { SmallVector Hashes; Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) @@ -65,7 +65,7 @@ class StructuralHashImpl { } } - stable_hash hashAPInt(const APInt &I) { + static stable_hash hashAPInt(const APInt &I) { SmallVector Hashes; Hashes.emplace_back(I.getBitWidth()); auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); @@ -73,11 +73,39 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - stable_hash hashAPFloat(const APFloat &F) { + static stable_hash hashAPFloat(const APFloat &F) { return hashAPInt(F.bitcastToAPInt()); } - stable_hash hashGlobalValue(const GlobalValue *GV) { + static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { + if (!GVar.hasInitializer()) + return hashGlobalValue(&GVar); + + // Hash the contents of a string. + if (GVar.getName().starts_with(".str")) { + auto *C = GVar.getInitializer(); + if (const auto *Seq = dyn_cast(C)) + if (Seq->isString()) + return stable_hash_name(Seq->getAsString()); + } + + // Hash structural contents of Objective-C metadata in specific sections. + // This can be extended to other metadata if needed. + static constexpr const char *SectionNames[] = { + "__cfstring", "__cstring", "__objc_classrefs", + "__objc_methname", "__objc_selrefs", + }; + if (GVar.hasSection()) { + StringRef SectionName = GVar.getSection(); + for (const char *Name : SectionNames) + if (SectionName.contains(Name)) + return hashConstant(GVar.getInitializer()); + } + + return hashGlobalValue(&GVar); + } + + static stable_hash hashGlobalValue(const GlobalValue *GV) { if (!GV->hasName()) return 0; return stable_hash_name(GV->getName()); @@ -87,7 +115,7 @@ class StructuralHashImpl { // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here // we're interested in computing a hash rather than comparing two Constants. // Some of the logic is simplified, e.g, we don't expand GEPOperator. - stable_hash hashConstant(Constant *C) { + static stable_hash hashConstant(const Constant *C) { SmallVector Hashes; Type *Ty = C->getType(); @@ -98,14 +126,21 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } + if (auto *GVar = dyn_cast(C)) { + Hashes.emplace_back(hashGlobalVariable(*GVar)); + return stable_hash_combine(Hashes); + } + if (auto *G = dyn_cast(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } if (const auto *Seq = dyn_cast(C)) { - Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); - return stable_hash_combine(Hashes); + if (Seq->isString()) { + Hashes.emplace_back(stable_hash_name(Seq->getAsString())); + return stable_hash_combine(Hashes); + } } switch (C->getValueID()) { @@ -266,6 +301,7 @@ class StructuralHashImpl { Hashes.emplace_back(Hash); Hashes.emplace_back(GlobalHeaderHash); Hashes.emplace_back(GV.getValueType()->getTypeID()); + Hashes.emplace_back(hashGlobalVariable(GV)); // Update the combined hash in place. Hash = stable_hash_combine(Hashes); @@ -297,6 +333,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { return H.getHash(); } +stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { + return StructuralHashImpl::hashGlobalVariable(GVar); +} + stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H(DetailedHash); H.update(M); diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll new file mode 100644 index 0000000000000..490a778f69e26 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll @@ -0,0 +1,32 @@ +; This test verifies that global variables (ns constant) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + +%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } +@__CFConstantStringClassReference = external global [0 x i32] +@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 + +@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 + +declare i32 @hoo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll new file mode 100644 index 0000000000000..0073114941501 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll @@ -0,0 +1,38 @@ +; This test verifies that global variables (objc metadata) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + +%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } + +@"OBJC_CLASS_$_MyClass" = external global %struct._class_t +@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 +@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 + +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 + +@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 +@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 + +declare ptr @objc_msgSend(ptr, ptr, ...) + +define i32 @f1() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 + %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} + +define i32 @f2() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 + %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll new file mode 100644 index 0000000000000..1e67425f0b847 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll @@ -0,0 +1,46 @@ +; This test verifies that global variables (string) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm +; CHECK-NOT: _f3.Tgm +; CHECK-NOT: _f4.Tgm + +; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`. +@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1 + +declare i32 @goo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @goo(ptr noundef nonnull @.str) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @goo(ptr noundef nonnull @.str.1) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f3() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f4() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3) + %add = sub nsw i32 %call, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll new file mode 100644 index 0000000000000..63ba1d491f9c7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll @@ -0,0 +1,64 @@ +; This test verifies that global variables are hashed based on their initial contents, +; allowing them to be outlined even if they appear different due to their names. + +; RUN: split-file %s %t + +; The outlined function is created locally. +; Note that `.str.3` is commonly used in both `f1()` and `f2()`. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ +; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE + +; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}: +; WRITE: adrp x1, l_.str.3 +; WRITE-NEXT: add x1, x1, l_.str.3 +; WRITE-NEXT: mov w2 +; WRITE-NEXT: mov w3 +; WRITE-NEXT: mov w4 +; WRITE-NEXT: b + +; Create an object file and merge it into the cgdata. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ +; RUN: -filetype=obj %t/local-two.ll -o %t_write_base +; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base + +; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll. +; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \ +; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ + +; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}: +; READ: adrp x1, l_.str.5 +; READ-NEXT: add x1, x1, l_.str.5 +; READ-NEXT: mov w2 +; READ-NEXT: mov w3 +; READ-NEXT: mov w4 +; READ-NEXT: b + +;--- local-two.ll +@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1 +@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 + +declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) +define i32 @f1() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) + ret i32 %call +} +define i32 @f2() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) + ret i32 %call +} + +;--- local-one.ll +@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 +@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 + +declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) +define i32 @f1() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3) + ret i32 %call +} From a201ba1b57aa57df8e31603b496793fa39d31936 Mon Sep 17 00:00:00 2001 From: Kunwar Grover Date: Wed, 4 Dec 2024 00:03:50 +0000 Subject: [PATCH 147/191] [mlir][Vector] Add support for 0-d shapes in extract-shape_cast folder (#116650) The extract <-> shape cast folder was conservatively asserting and failing on 0-d vectors. This pr fixes this. This pr also adds more tests for 0d cases and updates related tests to better reflect what they test. --- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 5 --- mlir/test/Dialect/Vector/canonicalize.mlir | 36 ++++++++++++++-------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 0c0a7bc98d8b5..ad709813c6216 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -1756,11 +1756,6 @@ static Value foldExtractFromShapeCast(ExtractOp extractOp) { if (!shapeCastOp) return Value(); - // 0-D vectors not supported. - assert(!hasZeroDimVectors(extractOp) && "0-D vectors not supported"); - if (hasZeroDimVectors(shapeCastOp)) - return Value(); - // Get the nth dimension size starting from lowest dimension. auto getDimReverse = [](VectorType type, int64_t n) { return type.getShape().take_back(n + 1).front(); diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 5ae769090dac6..89af0f7332f5c 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -782,23 +782,23 @@ func.func @fold_extract_shapecast(%arg0 : vector<5x1x3x2xf32>, // ----- -// CHECK-LABEL: fold_extract_shapecast_negative -// CHECK: %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32> -// CHECK: %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32> -// CHECK: return %[[R]] : vector<4x2xf32> -func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> { - %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32> - %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32> - return %r : vector<4x2xf32> +// CHECK-LABEL: fold_extract_shapecast_0d_result +// CHECK-SAME: %[[IN:.*]]: vector<1x1x1xf32> +// CHECK: %[[R:.*]] = vector.extract %[[IN]][0, 0, 0] : f32 from vector<1x1x1xf32> +// CHECK: return %[[R]] : f32 +func.func @fold_extract_shapecast_0d_result(%arg0 : vector<1x1x1xf32>) -> f32 { + %0 = vector.shape_cast %arg0 : vector<1x1x1xf32> to vector + %r = vector.extract %0[] : f32 from vector + return %r : f32 } // ----- -// CHECK-LABEL: dont_fold_0d_extract_shapecast -// CHECK: %[[V:.*]] = vector.shape_cast %{{.*}} : vector to vector<1xf32> -// CHECK: %[[R:.*]] = vector.extract %[[V]][0] : f32 from vector<1xf32> +// CHECK-LABEL: fold_extract_shapecast_0d_source +// CHECK-SAME: %[[IN:.*]]: vector +// CHECK: %[[R:.*]] = vector.extract %[[IN]][] : f32 from vector // CHECK: return %[[R]] : f32 -func.func @dont_fold_0d_extract_shapecast(%arg0 : vector) -> f32 { +func.func @fold_extract_shapecast_0d_source(%arg0 : vector) -> f32 { %0 = vector.shape_cast %arg0 : vector to vector<1xf32> %r = vector.extract %0[0] : f32 from vector<1xf32> return %r : f32 @@ -806,6 +806,18 @@ func.func @dont_fold_0d_extract_shapecast(%arg0 : vector) -> f32 { // ----- +// CHECK-LABEL: fold_extract_shapecast_negative +// CHECK: %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32> +// CHECK: %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32> +// CHECK: return %[[R]] : vector<4x2xf32> +func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> { + %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32> + %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32> + return %r : vector<4x2xf32> +} + +// ----- + // CHECK-LABEL: fold_extract_shapecast_to_shapecast // CHECK-SAME: (%[[ARG:.+]]: vector<3x4xf32>) // CHECK: %[[R:.+]] = vector.shape_cast %[[ARG]] : vector<3x4xf32> to vector<12xf32> From e9dc6c5fbb6d2c2c93095acb6ff4ca0b515057ed Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 3 Dec 2024 19:15:56 -0500 Subject: [PATCH 148/191] CodeGen: Don't assert when printing null GlobalAddress operands (#115531) --- llvm/lib/CodeGen/MachineOperand.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index d9e5e9d9d1e41..18027b2db2947 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -909,7 +909,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << printJumpTableEntryReference(getIndex()); break; case MachineOperand::MO_GlobalAddress: - getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + if (const auto *GV = getGlobal()) + getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + else // Invalid, but may appear in debugging scenarios. + OS << "globaladdress(null)"; + printOperandOffset(OS, getOffset()); break; case MachineOperand::MO_ExternalSymbol: { From 35cce408eef1a253df12c0023c993d78b180b1f3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 3 Dec 2024 16:35:23 -0800 Subject: [PATCH 149/191] [WebAssembly] Support the new "Lime1" CPU (#112035) This adds WebAssembly support for the new [Lime1 CPU]. First, this defines some new target features. These are subsets of existing features that reflect implementation concerns: - "call-indirect-overlong" - implied by "reference-types"; just the overlong encoding for the `call_indirect` immediate, and not the actual reference types. - "bulk-memory-opt" - implied by "bulk-memory": just `memory.copy` and `memory.fill`, and not the other instructions in the bulk-memory proposal. Next, this defines a new target CPU, "lime1", which enables mutable-globals, bulk-memory-opt, multivalue, sign-ext, nontrapping-fptoint, extended-const, and call-indirect-overlong. Unlike the default "generic" CPU, "lime1" is meant to be frozen, and followed up by "lime2" and so on when new features are desired. [Lime1 CPU]: https://github.com/WebAssembly/tool-conventions/blob/main/Lime.md#lime1 --------- Co-authored-by: Heejin Ahn --- clang/docs/ReleaseNotes.rst | 6 +++++ clang/lib/Basic/Targets/WebAssembly.cpp | 15 ++++++++++- llvm/docs/ReleaseNotes.md | 6 +++++ llvm/lib/Target/WebAssembly/WebAssembly.td | 7 +++++ .../WebAssembly/target-features-cpus.ll | 27 +++++++++++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5026d4475b38a..02284225fb4fa 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -935,9 +935,15 @@ and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] and [Non-trapping float-to-int Conversions] language features, which are [widely implemented in engines]. +A new Lime1 target CPU is added, -mcpu=lime1. This CPU follows the definition of +the Lime1 CPU [here], and enables -mmultivalue, -mmutable-globals, +-mcall-indirect-overlong, -msign-ext, -mbulk-memory-opt, -mnontrapping-fptoint, +and -mextended-const. + [Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md [Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md [widely implemented in engines]: https://webassembly.org/features/ +[here]: https://github.com/WebAssembly/tool-conventions/blob/main/Lime.md#lime1 AVR Support ^^^^^^^^^^^ diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index d9d01bceb433a..85e550ad20d5e 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -31,7 +31,7 @@ static constexpr Builtin::Info BuiltinInfo[] = { }; static constexpr llvm::StringLiteral ValidCPUNames[] = { - {"mvp"}, {"bleeding-edge"}, {"generic"}}; + {"mvp"}, {"bleeding-edge"}, {"generic"}, {"lime"}}; StringRef WebAssemblyTargetInfo::getABI() const { return ABI; } @@ -167,6 +167,17 @@ bool WebAssemblyTargetInfo::initFeatureMap( Features["reference-types"] = true; Features["sign-ext"] = true; }; + auto addLime1Features = [&]() { + // Lime1: + // + Features["bulk-memory-opt"] = true; + Features["call-indirect-overlong"] = true; + Features["extended-const"] = true; + Features["multivalue"] = true; + Features["mutable-globals"] = true; + Features["nontrapping-fptoint"] = true; + Features["sign-ext"] = true; + }; auto addBleedingEdgeFeatures = [&]() { addGenericFeatures(); Features["atomics"] = true; @@ -180,6 +191,8 @@ bool WebAssemblyTargetInfo::initFeatureMap( }; if (CPU == "generic") { addGenericFeatures(); + } else if (CPU == "lime1") { + addLime1Features(); } else if (CPU == "bleeding-edge") { addBleedingEdgeFeatures(); } diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index dc3f3aeb735f8..d8d9c4fc4bb8a 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -226,9 +226,15 @@ and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] and [Non-trapping float-to-int Conversions] language features, which are [widely implemented in engines]. +A new Lime1 target CPU is added, -mcpu=lime1. This CPU follows the definition of +the Lime1 CPU [here], and enables -mmultivalue, -mmutable-globals, +-mcall-indirect-overlong, -msign-ext, -mbulk-memory-opt, -mnontrapping-fptoint, +and -mextended-const. + [Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md [Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md [widely implemented in engines]: https://webassembly.org/features/ +[here]: https://github.com/WebAssembly/tool-conventions/blob/main/Lime.md#lime1 Changes to the Windows Target ----------------------------- diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 3b9254b3a7cee..13603f8181198 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -127,6 +127,13 @@ def : ProcessorModel<"generic", NoSchedModel, FeatureMutableGlobals, FeatureNontrappingFPToInt, FeatureReferenceTypes, FeatureSignExt]>; +// Lime1: +def : ProcessorModel<"lime1", NoSchedModel, + [FeatureBulkMemoryOpt, FeatureCallIndirectOverlong, + FeatureExtendedConst, FeatureMultivalue, + FeatureMutableGlobals, FeatureNontrappingFPToInt, + FeatureSignExt]>; + // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt, diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index 661f5d8463928..1c77ad5c049a5 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mcpu=mvp | FileCheck %s --check-prefixes MVP ; RUN: llc < %s -mcpu=generic | FileCheck %s --check-prefixes GENERIC +; RUN: llc < %s -mcpu=lime1 | FileCheck %s --check-prefixes LIME1 ; RUN: llc < %s | FileCheck %s --check-prefixes GENERIC ; RUN: llc < %s -mcpu=bleeding-edge | FileCheck %s --check-prefixes BLEEDING-EDGE @@ -39,6 +40,32 @@ target triple = "wasm32-unknown-unknown" ; GENERIC-NEXT: .int8 8 ; GENERIC-NEXT: .ascii "sign-ext" +; lime1: +bulk-memory-opt, +call-indirect-overlong, +extended-const, +multivalue, +; +mutable-globals, +nontrapping-fptoint, +sign-ext +; LIME1-LABEL: .custom_section.target_features,"",@ +; LIME1-NEXT: .int8 7 +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 15 +; LIME1-NEXT: .ascii "bulk-memory-opt" +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 22 +; LIME1-NEXT: .ascii "call-indirect-overlong" +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 14 +; LIME1-NEXT: .ascii "extended-const" +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 10 +; LIME1-NEXT: .ascii "multivalue" +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 15 +; LIME1-NEXT: .ascii "mutable-globals" +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 19 +; LIME1-NEXT: .ascii "nontrapping-fptoint" +; LIME1-NEXT: .int8 43 +; LIME1-NEXT: .int8 8 +; LIME1-NEXT: .ascii "sign-ext" + ; bleeding-edge: +atomics, +bulk-memory, +bulk-memory-opt, ; +call-indirect-overlong, +exception-handling, ; +extended-const, +fp16, +multimemory, +multivalue, From 410cbe3cf28913cca2fc61b3437306b841d08172 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 3 Dec 2024 19:38:35 -0500 Subject: [PATCH 150/191] [AMDGPU] Use COV6 by default (#118515) --- clang/docs/ReleaseNotes.rst | 2 ++ clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +- clang/test/CodeGen/amdgpu-address-spaces.cpp | 2 +- clang/test/CodeGenCUDA/amdgpu-code-object-version.cu | 2 +- clang/test/CodeGenCXX/dynamic-cast-address-space.cpp | 6 +++--- clang/test/CodeGenHIP/default-attributes.hip | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 6 +++--- .../amdgcn/bitcode/oclc_abi_version_600.bc | 0 .../lib/amdgcn/bitcode/oclc_abi_version_600.bc | 0 .../lib64/amdgcn/bitcode/oclc_abi_version_600.bc | 0 clang/test/Driver/hip-device-libs.hip | 2 +- clang/test/OpenMP/amdgcn_target_global_constructor.cpp | 4 ++-- libc/cmake/modules/prepare_libc_gpu_build.cmake | 2 +- llvm/docs/ReleaseNotes.md | 2 ++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- .../CodeGen/AMDGPU/default_amdhsa_code_object_version.ll | 7 +++++++ offload/plugins-nextgen/common/src/Utils/ELF.cpp | 5 +++-- 18 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc create mode 100644 clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc create mode 100644 clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc create mode 100644 llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 02284225fb4fa..366e96f054c20 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -818,6 +818,8 @@ Target Specific Changes AMDGPU Support ^^^^^^^^^^^^^^ +- Bump the default code object version to 6. + - Initial support for gfx950 - Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4bc0b97ea68f2..29db1aa21ed29 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5140,12 +5140,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee", NegFlag>; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">, Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, Values<"none,4,5,6">, NormalizedValuesScope<"llvm::CodeObjectVersionKind">, NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>, - MarshallingInfoEnum, "COV_5">; + MarshallingInfoEnum, "COV_6">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 8d977149e6248..4e6ace48c3ffb 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2705,7 +2705,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 6; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp index ae2c61439f4ca..b121b559f58dc 100644 --- a/clang/test/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234; // CHECK: @u = addrspace(5) global i32 undef, align 4 // CHECK: @aaa = addrspace(6) global i32 1000, align 4 // CHECK: @bbb = addrspace(999) global i32 1234, align 4 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index ffe12544917f7..aa0e3edec3f6a 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V6 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 271d9ede79d0c..7eebdf68115a9 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -13,7 +13,7 @@ B fail; // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 @@ -118,11 +118,11 @@ const B& f(A *a) { // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. -// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. -// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 1b53ebec9b582..f4dbad021987f 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -8,7 +8,7 @@ //. // OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0 // OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" -// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. __device__ void extern_func(); @@ -39,7 +39,7 @@ __global__ void kernel() { // OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } //. -// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 62b5661da9dbd..7f2a17b6ef8c5 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -62,7 +62,7 @@ kernel void test_target_features_kernel(global int *i) { //. // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone // NOCPU-LABEL: define {{[^@]+}}@callee @@ -759,7 +759,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #[[ATTR8]] = { nounwind } // GFX900: attributes #[[ATTR9]] = { convergent nounwind } //. -// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0} // NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0} @@ -777,7 +777,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: [[META15]] = !{i32 1} // NOCPU: [[META16]] = !{!"int*"} //. -// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0} // GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} diff --git a/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 6f1d31508e330..3ae384cf05d97 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -157,7 +157,7 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6 // Test default code object version with old device library without abi_version_400.bc // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp index 9f1e68d4ea0fe..d728dc1233e2c 100644 --- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp +++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp @@ -29,7 +29,7 @@ S A; // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4 // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }] // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }] -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init // CHECK-SAME: () #[[ATTR0:[0-9]+]] { @@ -104,7 +104,7 @@ S A; // CHECK: attributes #[[ATTR4]] = { convergent nounwind } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0} -// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51} // CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51} diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 937bd22451c5f..f8f5a954e5e91 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -104,7 +104,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) # The AMDGPU environment uses different code objects to encode the ABI for # kernel calls and intrinsic functions. We want to specify this manually to # conform to whatever the test suite was built to handle. - set(LIBC_GPU_CODE_OBJECT_VERSION 5) + set(LIBC_GPU_CODE_OBJECT_VERSION 6) endif() if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index d8d9c4fc4bb8a..414ca0b919d76 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -130,6 +130,8 @@ Changes to the AArch64 Backend Changes to the AMDGPU Backend ----------------------------- +* Bump the default `.amdhsa_code_object_version` to 6. + * Removed `llvm.amdgcn.flat.atomic.fadd` and `llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the {ref}`atomicrmw ` instruction with `fadd` and diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5a0e812748fbb..48eecc511bdae 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt DefaultAMDHSACodeObjectVersion( "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), + llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)")); diff --git a/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll new file mode 100644 index 0000000000000..6f79cf23bfbf7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s + +; CHECK: .amdhsa_code_object_version 6 + +define amdgpu_kernel void @kernel() { + ret void +} diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index 88642fd5b5640..18b5ad3351b12 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -64,8 +64,9 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 && - Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5) - return createError("Invalid AMD ABI version, must be version 4 or 5"); + Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && + Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) + return createError("Invalid AMD ABI version, must be version above 4"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201) return createError("Unsupported AMDGPU architecture"); From 259bdc0033d1abacc80ad34f0a8ed86f6e218571 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 3 Dec 2024 16:49:57 -0800 Subject: [PATCH 151/191] Revert "Reland "[NFC] Move DroppedVariableStats to its own file and redesign it to be extensible. (#117042)" (#118546)" This reverts commit 0c8928d456ac3ef23ed25bfc9e5d491dd7b62a11. Broke Bot: https://lab.llvm.org/buildbot/#/builders/76/builds/5008 error: undefined reference to `vtable for llvm::DroppedVariableStatsIR' --- ...s-to-collect-dropped-var-stats-for-M.patch | 1301 ----------------- ...DroppedVariableStats-to-its-own-file.patch | 1045 ------------- .../llvm/CodeGen/DroppedVariableStats.h | 224 --- .../llvm/Passes/StandardInstrumentations.h | 80 +- llvm/lib/CodeGen/CMakeLists.txt | 1 - llvm/lib/CodeGen/DroppedVariableStats.cpp | 194 --- llvm/lib/Passes/StandardInstrumentations.cpp | 178 ++- llvm/unittests/CodeGen/CMakeLists.txt | 1 - llvm/unittests/IR/CMakeLists.txt | 1 + .../DroppedVariableStatsTest.cpp} | 74 +- 10 files changed, 299 insertions(+), 2800 deletions(-) delete mode 100644 0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch delete mode 100644 0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch delete mode 100644 llvm/include/llvm/CodeGen/DroppedVariableStats.h delete mode 100644 llvm/lib/CodeGen/DroppedVariableStats.cpp rename llvm/unittests/{CodeGen/DroppedVariableStatsIRTest.cpp => IR/DroppedVariableStatsTest.cpp} (91%) diff --git a/0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch b/0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch deleted file mode 100644 index 95c0a0b54f7e2..0000000000000 --- a/0001-Reland-Add-a-pass-to-collect-dropped-var-stats-for-M.patch +++ /dev/null @@ -1,1301 +0,0 @@ -From 8f00eaaa595c1b908d43b1de288e3c03f1f998bf Mon Sep 17 00:00:00 2001 -From: Shubham Sandeep Rastogi -Date: Mon, 18 Nov 2024 16:06:59 -0800 -Subject: [PATCH] Reland "Add a pass to collect dropped var stats for MIR" - -Moved the MIR Test to the unittests/CodeGen folder ---- - .../llvm/CodeGen/DroppedVariableStats.h | 48 +- - .../llvm/CodeGen/MachineFunctionPass.h | 2 + - llvm/lib/CodeGen/DroppedVariableStats.cpp | 63 +- - llvm/lib/CodeGen/MachineFunctionPass.cpp | 15 +- - llvm/unittests/CodeGen/CMakeLists.txt | 1 + - .../CodeGen/DroppedVariableStatsMIRTest.cpp | 1067 +++++++++++++++++ - 6 files changed, 1193 insertions(+), 3 deletions(-) - create mode 100644 llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp - -diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h -index 371d775b02e8..f6050c68c91a 100644 ---- a/llvm/include/llvm/CodeGen/DroppedVariableStats.h -+++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h -@@ -7,7 +7,7 @@ - ///===---------------------------------------------------------------------===// - /// \file - /// Dropped Variable Statistics for Debug Information. Reports any number --/// of #dbg_value that get dropped due to an optimization pass. -+/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. - /// - ///===---------------------------------------------------------------------===// - -@@ -221,6 +221,52 @@ private: - } - }; - -+/// A class to collect and print dropped debug information due to MIR -+/// optimization passes. After every MIR pass is run, it will print how many -+/// #DBG_VALUEs were dropped due to that pass. -+class DroppedVariableStatsMIR : public DroppedVariableStats { -+public: -+ DroppedVariableStatsMIR() : llvm::DroppedVariableStats(false) {} -+ -+ void runBeforePass(StringRef PassID, MachineFunction *MF) { -+ if (PassID == "Debug Variable Analysis") -+ return; -+ setup(); -+ return runOnMachineFunction(MF, true); -+ } -+ -+ void runAfterPass(StringRef PassID, MachineFunction *MF) { -+ if (PassID == "Debug Variable Analysis") -+ return; -+ runOnMachineFunction(MF, false); -+ calculateDroppedVarStatsOnMachineFunction(MF, PassID, MF->getName().str()); -+ cleanup(); -+ } -+ -+private: -+ const MachineFunction *MFunc; -+ /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or -+ /// after a pass has run to facilitate dropped variable calculation for an -+ /// llvm::MachineFunction. -+ void runOnMachineFunction(const MachineFunction *MF, bool Before); -+ /// Iterate over all Instructions in a MachineFunction and report any dropped -+ /// debug information. -+ void calculateDroppedVarStatsOnMachineFunction(const MachineFunction *MF, -+ StringRef PassID, -+ StringRef FuncOrModName); -+ /// Override base class method to run on an llvm::MachineFunction -+ /// specifically. -+ virtual void -+ visitEveryInstruction(unsigned &DroppedCount, -+ DenseMap &InlinedAtsMap, -+ VarID Var) override; -+ /// Override base class method to run on DBG_VALUEs specifically. -+ virtual void visitEveryDebugRecord( -+ DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before) override; -+}; -+ - } // namespace llvm - - #endif -diff --git a/llvm/include/llvm/CodeGen/MachineFunctionPass.h b/llvm/include/llvm/CodeGen/MachineFunctionPass.h -index caaf22c2139e..d82b593497ff 100644 ---- a/llvm/include/llvm/CodeGen/MachineFunctionPass.h -+++ b/llvm/include/llvm/CodeGen/MachineFunctionPass.h -@@ -18,6 +18,7 @@ - #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H - #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H - -+#include "llvm/CodeGen/DroppedVariableStats.h" - #include "llvm/CodeGen/MachineFunction.h" - #include "llvm/Pass.h" - -@@ -67,6 +68,7 @@ private: - MachineFunctionProperties RequiredProperties; - MachineFunctionProperties SetProperties; - MachineFunctionProperties ClearedProperties; -+ DroppedVariableStatsMIR DroppedVarStatsMF; - - /// createPrinterPass - Get a machine function printer pass. - Pass *createPrinterPass(raw_ostream &O, -diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp -index 122fcad1293f..71f91292160f 100644 ---- a/llvm/lib/CodeGen/DroppedVariableStats.cpp -+++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp -@@ -7,7 +7,7 @@ - ///===---------------------------------------------------------------------===// - /// \file - /// Dropped Variable Statistics for Debug Information. Reports any number --/// of #dbg_value that get dropped due to an optimization pass. -+/// of #dbg_values or DBG_VALUEs that get dropped due to an optimization pass. - /// - ///===---------------------------------------------------------------------===// - -@@ -192,3 +192,64 @@ void DroppedVariableStatsIR::visitEveryDebugRecord( - } - } - } -+ -+void DroppedVariableStatsMIR::runOnMachineFunction(const MachineFunction *MF, -+ bool Before) { -+ auto &DebugVariables = DebugVariablesStack.back()[&MF->getFunction()]; -+ auto FuncName = MF->getName(); -+ MFunc = MF; -+ run(DebugVariables, FuncName, Before); -+} -+ -+void DroppedVariableStatsMIR::calculateDroppedVarStatsOnMachineFunction( -+ const MachineFunction *MF, StringRef PassID, StringRef FuncOrModName) { -+ MFunc = MF; -+ StringRef FuncName = MF->getName(); -+ const Function *Func = &MF->getFunction(); -+ DebugVariables &DbgVariables = DebugVariablesStack.back()[Func]; -+ calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, -+ "MachineFunction", Func); -+} -+ -+void DroppedVariableStatsMIR::visitEveryInstruction( -+ unsigned &DroppedCount, DenseMap &InlinedAtsMap, -+ VarID Var) { -+ unsigned PrevDroppedCount = DroppedCount; -+ const DIScope *DbgValScope = std::get<0>(Var); -+ for (const auto &MBB : *MFunc) { -+ for (const auto &MI : MBB) { -+ if (!MI.isDebugInstr()) { -+ auto *DbgLoc = MI.getDebugLoc().get(); -+ if (!DbgLoc) -+ continue; -+ -+ auto *Scope = DbgLoc->getScope(); -+ if (updateDroppedCount(DbgLoc, Scope, DbgValScope, InlinedAtsMap, Var, -+ DroppedCount)) -+ break; -+ } -+ } -+ if (PrevDroppedCount != DroppedCount) { -+ PrevDroppedCount = DroppedCount; -+ break; -+ } -+ } -+} -+ -+void DroppedVariableStatsMIR::visitEveryDebugRecord( -+ DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before) { -+ for (const auto &MBB : *MFunc) { -+ for (const auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ auto *DbgVar = MI.getDebugVariable(); -+ if (!DbgVar) -+ continue; -+ auto DbgLoc = MI.getDebugLoc(); -+ populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, -+ FuncName, Before); -+ } -+ } -+ } -+} -diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp -index 62ac3e32d24d..e803811643f8 100644 ---- a/llvm/lib/CodeGen/MachineFunctionPass.cpp -+++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp -@@ -32,6 +32,11 @@ - using namespace llvm; - using namespace ore; - -+static cl::opt DroppedVarStatsMIR( -+ "dropped-variable-stats-mir", cl::Hidden, -+ cl::desc("Dump dropped debug variables stats for MIR passes"), -+ cl::init(false)); -+ - Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, - const std::string &Banner) const { - return createMachineFunctionPrinterPass(O, Banner); -@@ -91,7 +96,15 @@ bool MachineFunctionPass::runOnFunction(Function &F) { - - MFProps.reset(ClearedProperties); - -- bool RV = runOnMachineFunction(MF); -+ bool RV; -+ if (DroppedVarStatsMIR) { -+ auto PassName = getPassName(); -+ DroppedVarStatsMF.runBeforePass(PassName, &MF); -+ RV = runOnMachineFunction(MF); -+ DroppedVarStatsMF.runAfterPass(PassName, &MF); -+ } else { -+ RV = runOnMachineFunction(MF); -+ } - - if (ShouldEmitSizeRemarks) { - // We wanted size remarks. Check if there was a change to the number of -diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt -index 807fd1a9b7b5..50ef1bb5b7af 100644 ---- a/llvm/unittests/CodeGen/CMakeLists.txt -+++ b/llvm/unittests/CodeGen/CMakeLists.txt -@@ -28,6 +28,7 @@ add_llvm_unittest(CodeGenTests - DIEHashTest.cpp - DIETest.cpp - DroppedVariableStatsIRTest.cpp -+ DroppedVariableStatsMIRTest.cpp - DwarfStringPoolEntryRefTest.cpp - InstrRefLDVTest.cpp - LowLevelTypeTest.cpp -diff --git a/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp -new file mode 100644 -index 000000000000..b26a89c7adcb ---- /dev/null -+++ b/llvm/unittests/CodeGen/DroppedVariableStatsMIRTest.cpp -@@ -0,0 +1,1067 @@ -+//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests -+//----------===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/AsmParser/Parser.h" -+#include "llvm/CodeGen/MIRParser/MIRParser.h" -+#include "llvm/CodeGen/MachineModuleInfo.h" -+#include "llvm/IR/Function.h" -+#include "llvm/IR/LegacyPassManager.h" -+#include "llvm/IR/Module.h" -+#include "llvm/MC/TargetRegistry.h" -+#include "llvm/Pass.h" -+#include "llvm/Passes/StandardInstrumentations.h" -+#include "llvm/Support/TargetSelect.h" -+#include "llvm/Target/TargetMachine.h" -+#include "gtest/gtest.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+using namespace llvm; -+ -+namespace { -+ -+std::unique_ptr -+createTargetMachine(std::string TT, StringRef CPU, StringRef FS) { -+ std::string Error; -+ const Target *T = TargetRegistry::lookupTarget(TT, Error); -+ if (!T) -+ return nullptr; -+ TargetOptions Options; -+ return std::unique_ptr( -+ static_cast(T->createTargetMachine( -+ TT, CPU, FS, Options, std::nullopt, std::nullopt))); -+} -+ -+std::unique_ptr parseMIR(const TargetMachine &TM, StringRef MIRCode, -+ MachineModuleInfo &MMI, LLVMContext *Context) { -+ SMDiagnostic Diagnostic; -+ std::unique_ptr M; -+ std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); -+ auto MIR = createMIRParser(std::move(MBuffer), *Context); -+ if (!MIR) -+ return nullptr; -+ -+ std::unique_ptr Mod = MIR->parseIRModule(); -+ if (!Mod) -+ return nullptr; -+ -+ Mod->setDataLayout(TM.createDataLayout()); -+ -+ if (MIR->parseMachineFunctions(*Mod, MMI)) { -+ M.reset(); -+ return nullptr; -+ } -+ return Mod; -+} -+// This test ensures that if a DBG_VALUE and an instruction that exists in the -+// same scope as that DBG_VALUE are both deleted as a result of an optimization -+// pass, debug information is considered not dropped. -+TEST(DroppedVariableStatsMIR, BothDeleted) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4) -+ !12 = !DILocation(line: 2, column: 11, scope: !4) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ for (auto &MI : MBB) { -+ auto *DbgLoc = MI.getDebugLoc().get(); -+ if (DbgLoc) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), false); -+} -+ -+// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -+// but an instruction that shares the same scope as the DBG_VALUE still exists, -+// debug information is conisdered dropped. -+TEST(DroppedVariableStatsMIR, DbgValLost) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4) -+ !12 = !DILocation(line: 2, column: 11, scope: !4) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), true); -+} -+ -+// This test ensures that if a #dbg_value is dropped after an optimization pass, -+// but an instruction that has an unrelated scope as the #dbg_value still -+// exists, debug information is conisdered not dropped. -+TEST(DroppedVariableStatsMIR, UnrelatedScopes) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4) -+ !12 = !DILocation(line: 2, column: 11, scope: !13) -+ !13 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), false); -+} -+ -+// This test ensures that if a #dbg_value is dropped after an optimization pass, -+// but an instruction that has a scope which is a child of the #dbg_value scope -+// still exists, debug information is conisdered dropped. -+TEST(DroppedVariableStatsMIR, ChildScopes) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4) -+ !12 = !DILocation(line: 2, column: 11, scope: !13) -+ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), true); -+} -+ -+// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -+// but an instruction that has a scope which is a child of the DBG_VALUE scope -+// still exists, and the DBG_VALUE is inlined at another location, debug -+// information is conisdered not dropped. -+TEST(DroppedVariableStatsMIR, InlinedAt) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) -+ !12 = !DILocation(line: 2, column: 11, scope: !13) -+ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) -+ !14 = !DILocation(line: 3, column: 2, scope: !4) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), false); -+} -+ -+// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -+// but an instruction that has a scope which is a child of the DBG_VALUE scope -+// still exists, and the DBG_VALUE and the instruction are inlined at another -+// location, debug information is conisdered dropped. -+TEST(DroppedVariableStatsMIR, InlinedAtShared) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) -+ !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !14) -+ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) -+ !14 = !DILocation(line: 3, column: 2, scope: !4) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), true); -+} -+ -+// This test ensures that if a DBG_VALUE is dropped after an optimization pass, -+// but an instruction that has a scope which is a child of the DBG_VALUE scope -+// still exists, and the instruction is inlined at a location that is the -+// DBG_VALUE's inlined at location, debug information is conisdered dropped. -+TEST(DroppedVariableStatsMIR, InlinedAtChild) { -+ InitializeAllTargetInfos(); -+ InitializeAllTargets(); -+ InitializeAllTargetMCs(); -+ PassInstrumentationCallbacks PIC; -+ PassInstrumentation PI(&PIC); -+ -+ LLVMContext C; -+ -+ const char *MIR = -+ R"( -+--- | -+ ; ModuleID = '/tmp/test.ll' -+ source_filename = "/tmp/test.ll" -+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" -+ -+ define noundef range(i32 -2147483647, -2147483648) i32 @_Z3fooi(i32 noundef %x) local_unnamed_addr !dbg !4 { -+ entry: -+ #dbg_value(i32 %x, !10, !DIExpression(), !11) -+ %add = add nsw i32 %x, 1, !dbg !12 -+ ret i32 0 -+ } -+ -+ !llvm.dbg.cu = !{!0} -+ !llvm.module.flags = !{!2} -+ !llvm.ident = !{!3} -+ -+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") -+ !1 = !DIFile(filename: "/tmp/code.cpp", directory: "/") -+ !2 = !{i32 2, !"Debug Info Version", i32 3} -+ !3 = !{!"clang"} -+ !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !5, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) -+ !5 = !DIFile(filename: "/tmp/code.cpp", directory: "") -+ !6 = !DISubroutineType(types: !7) -+ !7 = !{!8, !8} -+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+ !9 = !{!10} -+ !10 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 1, type: !8) -+ !11 = !DILocation(line: 0, scope: !4, inlinedAt: !14) -+ !12 = !DILocation(line: 2, column: 11, scope: !13, inlinedAt: !15) -+ !13 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10, column: 28) -+ !14 = !DILocation(line: 3, column: 2, scope: !4) -+ !15 = !DILocation(line: 4, column: 5, scope: !13, inlinedAt: !14) -+ -+... -+--- -+name: _Z3fooi -+alignment: 4 -+exposesReturnsTwice: false -+legalized: false -+regBankSelected: false -+selected: false -+failedISel: false -+tracksRegLiveness: true -+hasWinCFI: false -+noPhis: false -+isSSA: true -+noVRegs: false -+hasFakeUses: false -+callsEHReturn: false -+callsUnwindInit: false -+hasEHCatchret: false -+hasEHScopes: false -+hasEHFunclets: false -+isOutlined: false -+debugInstrRef: false -+failsVerification: false -+tracksDebugUserValues: false -+registers: -+ - { id: 0, class: _, preferred-register: '', flags: [ ] } -+ - { id: 1, class: _, preferred-register: '', flags: [ ] } -+ - { id: 2, class: _, preferred-register: '', flags: [ ] } -+ - { id: 3, class: _, preferred-register: '', flags: [ ] } -+liveins: -+ - { reg: '$w0', virtual-reg: '' } -+frameInfo: -+ isFrameAddressTaken: false -+ isReturnAddressTaken: false -+ hasStackMap: false -+ hasPatchPoint: false -+ stackSize: 0 -+ offsetAdjustment: 0 -+ maxAlignment: 1 -+ adjustsStack: false -+ hasCalls: false -+ stackProtector: '' -+ functionContext: '' -+ maxCallFrameSize: 4294967295 -+ cvBytesOfCalleeSavedRegisters: 0 -+ hasOpaqueSPAdjustment: false -+ hasVAStart: false -+ hasMustTailInVarArgFunc: false -+ hasTailCall: false -+ isCalleeSavedInfoValid: false -+ localFrameSize: 0 -+ savePoint: '' -+ restorePoint: '' -+fixedStack: [] -+stack: [] -+entry_values: [] -+callSites: [] -+debugValueSubstitutions: [] -+constants: [] -+machineFunctionInfo: {} -+body: | -+ bb.1.entry: -+ liveins: $w0 -+ -+ %0:_(s32) = COPY $w0 -+ %1:_(s32) = G_CONSTANT i32 1 -+ %3:_(s32) = G_CONSTANT i32 0 -+ DBG_VALUE %0(s32), $noreg, !10, !DIExpression(), debug-location !11 -+ %2:_(s32) = nsw G_ADD %0, %1, debug-location !12 -+ $w0 = COPY %3(s32) -+ RET_ReallyLR implicit $w0 -+ )"; -+ auto TM = createTargetMachine(Triple::normalize("aarch64--"), "", ""); -+ MachineModuleInfo MMI(TM.get()); -+ std::unique_ptr M = parseMIR(*TM, MIR, MMI, &C); -+ ASSERT_TRUE(M); -+ -+ DroppedVariableStatsMIR Stats; -+ auto *MF = MMI.getMachineFunction(*M->getFunction("_Z3fooi")); -+ Stats.runBeforePass("Test", MF); -+ -+ // This loop simulates an IR pass that drops debug information. -+ for (auto &MBB : *MF) { -+ for (auto &MI : MBB) { -+ if (MI.isDebugValueLike()) { -+ MI.eraseFromParent(); -+ break; -+ } -+ } -+ break; -+ } -+ -+ Stats.runAfterPass("Test", MF); -+ ASSERT_EQ(Stats.getPassDroppedVariables(), true); -+} -+ -+} // end anonymous namespace --- -2.46.2 - diff --git a/0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch b/0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch deleted file mode 100644 index e68aa98b82b09..0000000000000 --- a/0001-Reland-NFC-Move-DroppedVariableStats-to-its-own-file.patch +++ /dev/null @@ -1,1045 +0,0 @@ -From 1f4f368b9c3b92787018a6ee410c5ab4e79b072d Mon Sep 17 00:00:00 2001 -From: Shubham Sandeep Rastogi -Date: Mon, 18 Nov 2024 16:06:26 -0800 -Subject: [PATCH] Reland [NFC] Move DroppedVariableStats to its own file and - redesign it to be extensible. - -Moved the IR unit test to the CodeGen folder to resolve linker errors: - -error: undefined reference to 'vtable for llvm::DroppedVariableStatsIR' ---- - .../llvm/CodeGen/DroppedVariableStats.h | 226 ++++++++++++++++++ - .../llvm/Passes/StandardInstrumentations.h | 80 +------ - llvm/lib/CodeGen/CMakeLists.txt | 1 + - llvm/lib/CodeGen/DroppedVariableStats.cpp | 194 +++++++++++++++ - llvm/lib/Passes/StandardInstrumentations.cpp | 178 +------------- - llvm/unittests/CodeGen/CMakeLists.txt | 1 + - .../DroppedVariableStatsIRTest.cpp} | 74 +++--- - llvm/unittests/IR/CMakeLists.txt | 1 - - 8 files changed, 456 insertions(+), 299 deletions(-) - create mode 100644 llvm/include/llvm/CodeGen/DroppedVariableStats.h - create mode 100644 llvm/lib/CodeGen/DroppedVariableStats.cpp - rename llvm/unittests/{IR/DroppedVariableStatsTest.cpp => CodeGen/DroppedVariableStatsIRTest.cpp} (91%) - -diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h -new file mode 100644 -index 000000000000..371d775b02e8 ---- /dev/null -+++ b/llvm/include/llvm/CodeGen/DroppedVariableStats.h -@@ -0,0 +1,226 @@ -+///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// -+/// -+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM -+/// Exceptions. See https://llvm.org/LICENSE.txt for license information. -+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+/// -+///===---------------------------------------------------------------------===// -+/// \file -+/// Dropped Variable Statistics for Debug Information. Reports any number -+/// of #dbg_value that get dropped due to an optimization pass. -+/// -+///===---------------------------------------------------------------------===// -+ -+#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H -+#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H -+ -+#include "llvm/CodeGen/MachinePassManager.h" -+#include "llvm/IR/DebugInfoMetadata.h" -+#include "llvm/IR/DiagnosticInfo.h" -+#include "llvm/IR/Function.h" -+#include "llvm/IR/Module.h" -+#include "llvm/IR/PassInstrumentation.h" -+ -+namespace llvm { -+ -+/// A unique key that represents a debug variable. -+/// First const DIScope *: Represents the scope of the debug variable. -+/// Second const DIScope *: Represents the InlinedAt scope of the debug -+/// variable. const DILocalVariable *: It is a pointer to the debug variable -+/// itself. -+using VarID = -+ std::tuple; -+ -+/// A base class to collect and print dropped debug information variable -+/// statistics. -+class DroppedVariableStats { -+public: -+ DroppedVariableStats(bool DroppedVarStatsEnabled) -+ : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { -+ if (DroppedVarStatsEnabled) -+ llvm::outs() -+ << "Pass Level, Pass Name, Num of Dropped Variables, Func or " -+ "Module Name\n"; -+ }; -+ -+ virtual ~DroppedVariableStats() = default; -+ -+ // We intend this to be unique per-compilation, thus no copies. -+ DroppedVariableStats(const DroppedVariableStats &) = delete; -+ void operator=(const DroppedVariableStats &) = delete; -+ -+ bool getPassDroppedVariables() { return PassDroppedVariables; } -+ -+protected: -+ void setup() { -+ DebugVariablesStack.push_back( -+ {DenseMap()}); -+ InlinedAts.push_back( -+ {DenseMap>()}); -+ } -+ -+ void cleanup() { -+ assert(!DebugVariablesStack.empty() && -+ "DebugVariablesStack shouldn't be empty!"); -+ assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); -+ DebugVariablesStack.pop_back(); -+ InlinedAts.pop_back(); -+ } -+ -+ bool DroppedVariableStatsEnabled = false; -+ struct DebugVariables { -+ /// DenseSet of VarIDs before an optimization pass has run. -+ DenseSet DebugVariablesBefore; -+ /// DenseSet of VarIDs after an optimization pass has run. -+ DenseSet DebugVariablesAfter; -+ }; -+ -+protected: -+ /// A stack of a DenseMap, that maps DebugVariables for every pass to an -+ /// llvm::Function. A stack is used because an optimization pass can call -+ /// other passes. -+ SmallVector> DebugVariablesStack; -+ -+ /// A DenseSet tracking whether a scope was visited before. -+ DenseSet VisitedScope; -+ /// A stack of DenseMaps, which map the name of an llvm::Function to a -+ /// DenseMap of VarIDs and their inlinedAt locations before an optimization -+ /// pass has run. -+ SmallVector>> InlinedAts; -+ /// Calculate the number of dropped variables in an llvm::Function or -+ /// llvm::MachineFunction and print the relevant information to stdout. -+ void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, -+ StringRef FuncName, StringRef PassID, -+ StringRef FuncOrModName, -+ StringRef PassLevel, const Function *Func); -+ -+ /// Check if a \p Var has been dropped or is a false positive. Also update the -+ /// \p DroppedCount if a debug variable is dropped. -+ bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, -+ const DIScope *DbgValScope, -+ DenseMap &InlinedAtsMap, -+ VarID Var, unsigned &DroppedCount); -+ /// Run code to populate relevant data structures over an llvm::Function or -+ /// llvm::MachineFunction. -+ void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before); -+ /// Populate the VarIDSet and InlinedAtMap with the relevant information -+ /// needed for before and after pass analysis to determine dropped variable -+ /// status. -+ void populateVarIDSetAndInlinedMap( -+ const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before); -+ /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the -+ /// debug variable denoted by its ID \p Var may have been dropped by an -+ /// optimization pass. -+ virtual void -+ visitEveryInstruction(unsigned &DroppedCount, -+ DenseMap &InlinedAtsMap, -+ VarID Var) = 0; -+ /// Visit every debug record in an llvm::Function or llvm::MachineFunction -+ /// and call populateVarIDSetAndInlinedMap on it. -+ virtual void visitEveryDebugRecord( -+ DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before) = 0; -+ -+private: -+ /// Remove a dropped debug variable's VarID from all Sets in the -+ /// DroppedVariablesBefore stack. -+ void removeVarFromAllSets(VarID Var, const Function *F) { -+ // Do not remove Var from the last element, it will be popped from the -+ // stack. -+ for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) -+ DebugVariablesMap[F].DebugVariablesBefore.erase(Var); -+ } -+ /// Return true if \p Scope is the same as \p DbgValScope or a child scope of -+ /// \p DbgValScope, return false otherwise. -+ bool isScopeChildOfOrEqualTo(const DIScope *Scope, -+ const DIScope *DbgValScope); -+ /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of -+ /// the InlinedAt chain, return false otherwise. -+ bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, -+ const DILocation *DbgValInlinedAt); -+ bool PassDroppedVariables = false; -+}; -+ -+/// A class to collect and print dropped debug information due to LLVM IR -+/// optimization passes. After every LLVM IR pass is run, it will print how many -+/// #dbg_values were dropped due to that pass. -+class DroppedVariableStatsIR : public DroppedVariableStats { -+public: -+ DroppedVariableStatsIR(bool DroppedVarStatsEnabled) -+ : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} -+ -+ virtual ~DroppedVariableStatsIR() = default; -+ -+ void runBeforePass(Any IR) { -+ setup(); -+ if (const auto *M = unwrapIR(IR)) -+ return this->runOnModule(M, true); -+ if (const auto *F = unwrapIR(IR)) -+ return this->runOnFunction(F, true); -+ } -+ -+ void runAfterPass(StringRef P, Any IR) { -+ if (const auto *M = unwrapIR(IR)) -+ runAfterPassModule(P, M); -+ else if (const auto *F = unwrapIR(IR)) -+ runAfterPassFunction(P, F); -+ cleanup(); -+ } -+ -+ void registerCallbacks(PassInstrumentationCallbacks &PIC); -+ -+private: -+ const Function *Func; -+ -+ void runAfterPassFunction(StringRef PassID, const Function *F) { -+ runOnFunction(F, false); -+ calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), -+ "Function"); -+ } -+ -+ void runAfterPassModule(StringRef PassID, const Module *M) { -+ runOnModule(M, false); -+ calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); -+ } -+ /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or -+ /// after a pass has run to facilitate dropped variable calculation for an -+ /// llvm::Function. -+ void runOnFunction(const Function *F, bool Before); -+ /// Iterate over all Instructions in a Function and report any dropped debug -+ /// information. -+ void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, -+ StringRef FuncOrModName, -+ StringRef PassLevel); -+ /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or -+ /// after a pass has run to facilitate dropped variable calculation for an -+ /// llvm::Module. Calls runOnFunction on every Function in the Module. -+ void runOnModule(const Module *M, bool Before); -+ /// Iterate over all Functions in a Module and report any dropped debug -+ /// information. Will call calculateDroppedVarStatsOnFunction on every -+ /// Function. -+ void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, -+ StringRef FuncOrModName, -+ StringRef PassLevel); -+ /// Override base class method to run on an llvm::Function specifically. -+ virtual void -+ visitEveryInstruction(unsigned &DroppedCount, -+ DenseMap &InlinedAtsMap, -+ VarID Var) override; -+ /// Override base class method to run on #dbg_values specifically. -+ virtual void visitEveryDebugRecord( -+ DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before) override; -+ -+ template static const IRUnitT *unwrapIR(Any IR) { -+ const IRUnitT **IRPtr = llvm::any_cast(&IR); -+ return IRPtr ? *IRPtr : nullptr; -+ } -+}; -+ -+} // namespace llvm -+ -+#endif -diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h -index 9301a12c740e..12a34c099eaf 100644 ---- a/llvm/include/llvm/Passes/StandardInstrumentations.h -+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h -@@ -19,6 +19,7 @@ - #include "llvm/ADT/SmallVector.h" - #include "llvm/ADT/StringRef.h" - #include "llvm/ADT/StringSet.h" -+#include "llvm/CodeGen/DroppedVariableStats.h" - #include "llvm/CodeGen/MachineBasicBlock.h" - #include "llvm/IR/BasicBlock.h" - #include "llvm/IR/DebugInfoMetadata.h" -@@ -579,83 +580,6 @@ private: - static void SignalHandler(void *); - }; - --/// A class to collect and print dropped debug information variable statistics. --/// After every LLVM IR pass is run, it will print how many #dbg_values were --/// dropped due to that pass. --class DroppedVariableStats { --public: -- DroppedVariableStats(bool DroppedVarStatsEnabled) { -- if (DroppedVarStatsEnabled) -- llvm::outs() -- << "Pass Level, Pass Name, Num of Dropped Variables, Func or " -- "Module Name\n"; -- }; -- // We intend this to be unique per-compilation, thus no copies. -- DroppedVariableStats(const DroppedVariableStats &) = delete; -- void operator=(const DroppedVariableStats &) = delete; -- -- void registerCallbacks(PassInstrumentationCallbacks &PIC); -- void runBeforePass(StringRef PassID, Any IR); -- void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); -- void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); -- bool getPassDroppedVariables() { return PassDroppedVariables; } -- --private: -- bool PassDroppedVariables = false; -- /// A unique key that represents a #dbg_value. -- using VarID = -- std::tuple; -- -- struct DebugVariables { -- /// DenseSet of VarIDs before an optimization pass has run. -- DenseSet DebugVariablesBefore; -- /// DenseSet of VarIDs after an optimization pass has run. -- DenseSet DebugVariablesAfter; -- }; -- -- /// A stack of a DenseMap, that maps DebugVariables for every pass to an -- /// llvm::Function. A stack is used because an optimization pass can call -- /// other passes. -- SmallVector> DebugVariablesStack; -- -- /// A DenseSet tracking whether a scope was visited before. -- DenseSet VisitedScope; -- /// A stack of DenseMaps, which map the name of an llvm::Function to a -- /// DenseMap of VarIDs and their inlinedAt locations before an optimization -- /// pass has run. -- SmallVector>> InlinedAts; -- -- /// Iterate over all Functions in a Module and report any dropped debug -- /// information. Will call calculateDroppedVarStatsOnFunction on every -- /// Function. -- void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, -- std::string FuncOrModName, -- std::string PassLevel); -- /// Iterate over all Instructions in a Function and report any dropped debug -- /// information. -- void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, -- std::string FuncOrModName, -- std::string PassLevel); -- /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or -- /// after a pass has run to facilitate dropped variable calculation for an -- /// llvm::Function. -- void runOnFunction(const Function *F, bool Before); -- /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or -- /// after a pass has run to facilitate dropped variable calculation for an -- /// llvm::Module. Calls runOnFunction on every Function in the Module. -- void runOnModule(const Module *M, bool Before); -- /// Remove a dropped #dbg_value VarID from all Sets in the -- /// DroppedVariablesBefore stack. -- void removeVarFromAllSets(VarID Var, const Function *F); -- /// Return true if \p Scope is the same as \p DbgValScope or a child scope of -- /// \p DbgValScope, return false otherwise. -- bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); -- /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of -- /// the InlinedAt chain, return false otherwise. -- bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, -- const DILocation *DbgValInlinedAt); --}; -- - /// This class provides an interface to register all the standard pass - /// instrumentations and manages their state (if any). - class StandardInstrumentations { -@@ -673,7 +597,7 @@ class StandardInstrumentations { - PrintCrashIRInstrumentation PrintCrashIR; - IRChangedTester ChangeTester; - VerifyInstrumentation Verify; -- DroppedVariableStats DroppedStats; -+ DroppedVariableStatsIR DroppedStatsIR; - - bool VerifyEach; - -diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt -index 7b47c0e6f75d..263d4a9ee94d 100644 ---- a/llvm/lib/CodeGen/CMakeLists.txt -+++ b/llvm/lib/CodeGen/CMakeLists.txt -@@ -50,6 +50,7 @@ add_llvm_component_library(LLVMCodeGen - DeadMachineInstructionElim.cpp - DetectDeadLanes.cpp - DFAPacketizer.cpp -+ DroppedVariableStats.cpp - DwarfEHPrepare.cpp - EarlyIfConversion.cpp - EdgeBundles.cpp -diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp -new file mode 100644 -index 000000000000..122fcad1293f ---- /dev/null -+++ b/llvm/lib/CodeGen/DroppedVariableStats.cpp -@@ -0,0 +1,194 @@ -+///===- DroppedVariableStats.cpp ------------------------------------------===// -+/// -+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM -+/// Exceptions. See https://llvm.org/LICENSE.txt for license information. -+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+/// -+///===---------------------------------------------------------------------===// -+/// \file -+/// Dropped Variable Statistics for Debug Information. Reports any number -+/// of #dbg_value that get dropped due to an optimization pass. -+/// -+///===---------------------------------------------------------------------===// -+ -+#include "llvm/CodeGen/DroppedVariableStats.h" -+#include "llvm/IR/DebugInfoMetadata.h" -+#include "llvm/IR/InstIterator.h" -+#include "llvm/IR/Module.h" -+ -+using namespace llvm; -+ -+bool DroppedVariableStats::isScopeChildOfOrEqualTo(const DIScope *Scope, -+ const DIScope *DbgValScope) { -+ while (Scope != nullptr) { -+ if (VisitedScope.find(Scope) == VisitedScope.end()) { -+ VisitedScope.insert(Scope); -+ if (Scope == DbgValScope) { -+ VisitedScope.clear(); -+ return true; -+ } -+ Scope = Scope->getScope(); -+ } else { -+ VisitedScope.clear(); -+ return false; -+ } -+ } -+ return false; -+} -+ -+bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( -+ const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { -+ if (DbgValInlinedAt == InlinedAt) -+ return true; -+ if (!DbgValInlinedAt) -+ return false; -+ auto *IA = InlinedAt; -+ while (IA) { -+ if (IA == DbgValInlinedAt) -+ return true; -+ IA = IA->getInlinedAt(); -+ } -+ return false; -+} -+ -+void DroppedVariableStats::calculateDroppedStatsAndPrint( -+ DebugVariables &DbgVariables, StringRef FuncName, StringRef PassID, -+ StringRef FuncOrModName, StringRef PassLevel, const Function *Func) { -+ unsigned DroppedCount = 0; -+ DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; -+ DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; -+ DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; -+ // Find an Instruction that shares the same scope as the dropped #dbg_value or -+ // has a scope that is the child of the scope of the #dbg_value, and has an -+ // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain -+ // contains the inlinedAt of the #dbg_value, if such an Instruction is found, -+ // debug information is dropped. -+ for (VarID Var : DebugVariablesBeforeSet) { -+ if (DebugVariablesAfterSet.contains(Var)) -+ continue; -+ visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); -+ removeVarFromAllSets(Var, Func); -+ } -+ if (DroppedCount > 0) { -+ llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " -+ << FuncOrModName << "\n"; -+ PassDroppedVariables = true; -+ } else -+ PassDroppedVariables = false; -+} -+ -+bool DroppedVariableStats::updateDroppedCount( -+ DILocation *DbgLoc, const DIScope *Scope, const DIScope *DbgValScope, -+ DenseMap &InlinedAtsMap, VarID Var, -+ unsigned &DroppedCount) { -+ -+ // If the Scope is a child of, or equal to the DbgValScope and is inlined at -+ // the Var's InlinedAt location, return true to signify that the Var has been -+ // dropped. -+ if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) -+ if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), -+ InlinedAtsMap[Var])) { -+ // Found another instruction in the variable's scope, so there exists a -+ // break point at which the variable could be observed. Count it as -+ // dropped. -+ DroppedCount++; -+ return true; -+ } -+ return false; -+} -+ -+void DroppedVariableStats::run(DebugVariables &DbgVariables, StringRef FuncName, -+ bool Before) { -+ auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore -+ : DbgVariables.DebugVariablesAfter); -+ auto &InlinedAtsMap = InlinedAts.back(); -+ if (Before) -+ InlinedAtsMap.try_emplace(FuncName, DenseMap()); -+ VarIDSet = DenseSet(); -+ visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); -+} -+ -+void DroppedVariableStats::populateVarIDSetAndInlinedMap( -+ const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before) { -+ VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; -+ VarIDSet.insert(Key); -+ if (Before) -+ InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); -+} -+ -+void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) { -+ auto &DebugVariables = DebugVariablesStack.back()[F]; -+ auto FuncName = F->getName(); -+ Func = F; -+ run(DebugVariables, FuncName, Before); -+} -+ -+void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction( -+ const Function *F, StringRef PassID, StringRef FuncOrModName, -+ StringRef PassLevel) { -+ Func = F; -+ StringRef FuncName = F->getName(); -+ DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; -+ calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, -+ PassLevel, Func); -+} -+ -+void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) { -+ for (auto &F : *M) -+ runOnFunction(&F, Before); -+} -+ -+void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule( -+ const Module *M, StringRef PassID, StringRef FuncOrModName, -+ StringRef PassLevel) { -+ for (auto &F : *M) { -+ calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); -+ } -+} -+ -+void DroppedVariableStatsIR::registerCallbacks( -+ PassInstrumentationCallbacks &PIC) { -+ if (!DroppedVariableStatsEnabled) -+ return; -+ -+ PIC.registerBeforeNonSkippedPassCallback( -+ [this](StringRef P, Any IR) { return runBeforePass(IR); }); -+ PIC.registerAfterPassCallback( -+ [this](StringRef P, Any IR, const PreservedAnalyses &PA) { -+ return runAfterPass(P, IR); -+ }); -+ PIC.registerAfterPassInvalidatedCallback( -+ [this](StringRef P, const PreservedAnalyses &PA) { return cleanup(); }); -+} -+ -+void DroppedVariableStatsIR::visitEveryInstruction( -+ unsigned &DroppedCount, DenseMap &InlinedAtsMap, -+ VarID Var) { -+ const DIScope *DbgValScope = std::get<0>(Var); -+ for (const auto &I : instructions(Func)) { -+ auto *DbgLoc = I.getDebugLoc().get(); -+ if (!DbgLoc) -+ continue; -+ if (updateDroppedCount(DbgLoc, DbgLoc->getScope(), DbgValScope, -+ InlinedAtsMap, Var, DroppedCount)) -+ break; -+ } -+} -+ -+void DroppedVariableStatsIR::visitEveryDebugRecord( -+ DenseSet &VarIDSet, -+ DenseMap> &InlinedAtsMap, -+ StringRef FuncName, bool Before) { -+ for (const auto &I : instructions(Func)) { -+ for (DbgRecord &DR : I.getDbgRecordRange()) { -+ if (auto *Dbg = dyn_cast(&DR)) { -+ auto *DbgVar = Dbg->getVariable(); -+ auto DbgLoc = DR.getDebugLoc(); -+ populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, -+ FuncName, Before); -+ } -+ } -+ } -+} -diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp -index 6259f8f736c8..b766517e68eb 100644 ---- a/llvm/lib/Passes/StandardInstrumentations.cpp -+++ b/llvm/lib/Passes/StandardInstrumentations.cpp -@@ -2462,7 +2462,7 @@ StandardInstrumentations::StandardInstrumentations( - PrintChanged == ChangePrinter::ColourDiffVerbose || - PrintChanged == ChangePrinter::ColourDiffQuiet), - WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), -- Verify(DebugLogging), DroppedStats(DroppedVarStats), -+ Verify(DebugLogging), DroppedStatsIR(DroppedVarStats), - VerifyEach(VerifyEach) {} - - PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = -@@ -2523,180 +2523,6 @@ void PrintCrashIRInstrumentation::registerCallbacks( - }); - } - --void DroppedVariableStats::registerCallbacks( -- PassInstrumentationCallbacks &PIC) { -- if (!DroppedVarStats) -- return; -- -- PIC.registerBeforeNonSkippedPassCallback( -- [this](StringRef P, Any IR) { return this->runBeforePass(P, IR); }); -- PIC.registerAfterPassCallback( -- [this](StringRef P, Any IR, const PreservedAnalyses &PA) { -- return this->runAfterPass(P, IR, PA); -- }); -- PIC.registerAfterPassInvalidatedCallback( -- [this](StringRef P, const PreservedAnalyses &PA) { -- return this->runAfterPassInvalidated(P, PA); -- }); --} -- --void DroppedVariableStats::runBeforePass(StringRef PassID, Any IR) { -- DebugVariablesStack.push_back({DenseMap()}); -- InlinedAts.push_back({DenseMap>()}); -- if (auto *M = unwrapIR(IR)) -- return this->runOnModule(M, true); -- if (auto *F = unwrapIR(IR)) -- return this->runOnFunction(F, true); --} -- --void DroppedVariableStats::runOnFunction(const Function *F, bool Before) { -- auto &DebugVariables = DebugVariablesStack.back()[F]; -- auto &VarIDSet = (Before ? DebugVariables.DebugVariablesBefore -- : DebugVariables.DebugVariablesAfter); -- auto &InlinedAtsMap = InlinedAts.back(); -- auto FuncName = F->getName(); -- if (Before) -- InlinedAtsMap.try_emplace(FuncName, DenseMap()); -- VarIDSet = DenseSet(); -- for (const auto &I : instructions(F)) { -- for (DbgRecord &DR : I.getDbgRecordRange()) { -- if (auto *Dbg = dyn_cast(&DR)) { -- auto *DbgVar = Dbg->getVariable(); -- auto DbgLoc = DR.getDebugLoc(); -- VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; -- VarIDSet.insert(Key); -- if (Before) -- InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); -- } -- } -- } --} -- --void DroppedVariableStats::runOnModule(const Module *M, bool Before) { -- for (auto &F : *M) -- runOnFunction(&F, Before); --} -- --void DroppedVariableStats::removeVarFromAllSets(VarID Var, const Function *F) { -- // Do not remove Var from the last element, it will be popped from the stack. -- for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) -- DebugVariablesMap[F].DebugVariablesBefore.erase(Var); --} -- --void DroppedVariableStats::calculateDroppedVarStatsOnModule( -- const Module *M, StringRef PassID, std::string FuncOrModName, -- std::string PassLevel) { -- for (auto &F : *M) { -- calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); -- } --} -- --void DroppedVariableStats::calculateDroppedVarStatsOnFunction( -- const Function *F, StringRef PassID, std::string FuncOrModName, -- std::string PassLevel) { -- unsigned DroppedCount = 0; -- StringRef FuncName = F->getName(); -- DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; -- DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; -- DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; -- DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; -- // Find an Instruction that shares the same scope as the dropped #dbg_value or -- // has a scope that is the child of the scope of the #dbg_value, and has an -- // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain -- // contains the inlinedAt of the #dbg_value, if such an Instruction is found, -- // debug information is dropped. -- for (VarID Var : DebugVariablesBeforeSet) { -- if (DebugVariablesAfterSet.contains(Var)) -- continue; -- const DIScope *DbgValScope = std::get<0>(Var); -- for (const auto &I : instructions(F)) { -- auto *DbgLoc = I.getDebugLoc().get(); -- if (!DbgLoc) -- continue; -- -- auto *Scope = DbgLoc->getScope(); -- if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) { -- if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), -- InlinedAtsMap[Var])) { -- // Found another instruction in the variable's scope, so there exists -- // a break point at which the variable could be observed. Count it as -- // dropped. -- DroppedCount++; -- break; -- } -- } -- } -- removeVarFromAllSets(Var, F); -- } -- if (DroppedCount > 0) { -- llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " -- << FuncOrModName << "\n"; -- PassDroppedVariables = true; -- } else -- PassDroppedVariables = false; --} -- --void DroppedVariableStats::runAfterPassInvalidated( -- StringRef PassID, const PreservedAnalyses &PA) { -- DebugVariablesStack.pop_back(); -- InlinedAts.pop_back(); --} -- --void DroppedVariableStats::runAfterPass(StringRef PassID, Any IR, -- const PreservedAnalyses &PA) { -- std::string PassLevel; -- std::string FuncOrModName; -- if (auto *M = unwrapIR(IR)) { -- this->runOnModule(M, false); -- PassLevel = "Module"; -- FuncOrModName = M->getName(); -- calculateDroppedVarStatsOnModule(M, PassID, FuncOrModName, PassLevel); -- } else if (auto *F = unwrapIR(IR)) { -- this->runOnFunction(F, false); -- PassLevel = "Function"; -- FuncOrModName = F->getName(); -- calculateDroppedVarStatsOnFunction(F, PassID, FuncOrModName, PassLevel); -- } -- -- DebugVariablesStack.pop_back(); -- InlinedAts.pop_back(); --} -- --bool DroppedVariableStats::isScopeChildOfOrEqualTo(DIScope *Scope, -- const DIScope *DbgValScope) { -- while (Scope != nullptr) { -- if (VisitedScope.find(Scope) == VisitedScope.end()) { -- VisitedScope.insert(Scope); -- if (Scope == DbgValScope) { -- VisitedScope.clear(); -- return true; -- } -- Scope = Scope->getScope(); -- } else { -- VisitedScope.clear(); -- return false; -- } -- } -- return false; --} -- --bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( -- const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { -- if (DbgValInlinedAt == InlinedAt) -- return true; -- if (!DbgValInlinedAt) -- return false; -- if (!InlinedAt) -- return false; -- auto *IA = InlinedAt; -- while (IA) { -- if (IA == DbgValInlinedAt) -- return true; -- IA = IA->getInlinedAt(); -- } -- return false; --} -- - void StandardInstrumentations::registerCallbacks( - PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) { - PrintIR.registerCallbacks(PIC); -@@ -2712,7 +2538,7 @@ void StandardInstrumentations::registerCallbacks( - WebsiteChangeReporter.registerCallbacks(PIC); - ChangeTester.registerCallbacks(PIC); - PrintCrashIR.registerCallbacks(PIC); -- DroppedStats.registerCallbacks(PIC); -+ DroppedStatsIR.registerCallbacks(PIC); - if (MAM) - PreservedCFGChecker.registerCallbacks(PIC, *MAM); - -diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt -index 963cdcc0275e..807fd1a9b7b5 100644 ---- a/llvm/unittests/CodeGen/CMakeLists.txt -+++ b/llvm/unittests/CodeGen/CMakeLists.txt -@@ -27,6 +27,7 @@ add_llvm_unittest(CodeGenTests - CCStateTest.cpp - DIEHashTest.cpp - DIETest.cpp -+ DroppedVariableStatsIRTest.cpp - DwarfStringPoolEntryRefTest.cpp - InstrRefLDVTest.cpp - LowLevelTypeTest.cpp -diff --git a/llvm/unittests/IR/DroppedVariableStatsTest.cpp b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp -similarity index 91% -rename from llvm/unittests/IR/DroppedVariableStatsTest.cpp -rename to llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp -index 61f3a87bb355..094ec7b65763 100644 ---- a/llvm/unittests/IR/DroppedVariableStatsTest.cpp -+++ b/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp -@@ -1,5 +1,4 @@ --//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests --//----------===// -+//===- unittests/IR/DroppedVariableStatsIRTest.cpp ------------------------===// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. -@@ -8,6 +7,7 @@ - //===----------------------------------------------------------------------===// - - #include "llvm/AsmParser/Parser.h" -+#include "llvm/CodeGen/DroppedVariableStats.h" - #include "llvm/IR/Function.h" - #include "llvm/IR/InstIterator.h" - #include "llvm/IR/LegacyPassManager.h" -@@ -44,7 +44,7 @@ namespace { - // This test ensures that if a #dbg_value and an instruction that exists in the - // same scope as that #dbg_value are both deleted as a result of an optimization - // pass, debug information is considered not dropped. --TEST(DroppedVariableStats, BothDeleted) { -+TEST(DroppedVariableStatsIR, BothDeleted) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -79,9 +79,8 @@ TEST(DroppedVariableStats, BothDeleted) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -92,16 +91,15 @@ TEST(DroppedVariableStats, BothDeleted) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), false); - } - - // This test ensures that if a #dbg_value is dropped after an optimization pass, - // but an instruction that shares the same scope as the #dbg_value still exists, - // debug information is conisdered dropped. --TEST(DroppedVariableStats, DbgValLost) { -+TEST(DroppedVariableStatsIR, DbgValLost) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -136,9 +134,8 @@ TEST(DroppedVariableStats, DbgValLost) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -148,16 +145,15 @@ TEST(DroppedVariableStats, DbgValLost) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); - } - - // This test ensures that if a #dbg_value is dropped after an optimization pass, - // but an instruction that has an unrelated scope as the #dbg_value still - // exists, debug information is conisdered not dropped. --TEST(DroppedVariableStats, UnrelatedScopes) { -+TEST(DroppedVariableStatsIR, UnrelatedScopes) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -193,9 +189,8 @@ TEST(DroppedVariableStats, UnrelatedScopes) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -205,16 +200,15 @@ TEST(DroppedVariableStats, UnrelatedScopes) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), false); - } - - // This test ensures that if a #dbg_value is dropped after an optimization pass, - // but an instruction that has a scope which is a child of the #dbg_value scope - // still exists, debug information is conisdered dropped. --TEST(DroppedVariableStats, ChildScopes) { -+TEST(DroppedVariableStatsIR, ChildScopes) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -250,9 +244,8 @@ TEST(DroppedVariableStats, ChildScopes) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -262,9 +255,8 @@ TEST(DroppedVariableStats, ChildScopes) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); - } - -@@ -272,7 +264,7 @@ TEST(DroppedVariableStats, ChildScopes) { - // but an instruction that has a scope which is a child of the #dbg_value scope - // still exists, and the #dbg_value is inlined at another location, debug - // information is conisdered not dropped. --TEST(DroppedVariableStats, InlinedAt) { -+TEST(DroppedVariableStatsIR, InlinedAt) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -308,9 +300,8 @@ TEST(DroppedVariableStats, InlinedAt) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -320,9 +311,8 @@ TEST(DroppedVariableStats, InlinedAt) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), false); - } - -@@ -330,7 +320,7 @@ TEST(DroppedVariableStats, InlinedAt) { - // but an instruction that has a scope which is a child of the #dbg_value scope - // still exists, and the #dbg_value and the instruction are inlined at another - // location, debug information is conisdered dropped. --TEST(DroppedVariableStats, InlinedAtShared) { -+TEST(DroppedVariableStatsIR, InlinedAtShared) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -366,9 +356,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -378,9 +367,8 @@ TEST(DroppedVariableStats, InlinedAtShared) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); - } - -@@ -388,7 +376,7 @@ TEST(DroppedVariableStats, InlinedAtShared) { - // but an instruction that has a scope which is a child of the #dbg_value scope - // still exists, and the instruction is inlined at a location that is the - // #dbg_value's inlined at location, debug information is conisdered dropped. --TEST(DroppedVariableStats, InlinedAtChild) { -+TEST(DroppedVariableStatsIR, InlinedAtChild) { - PassInstrumentationCallbacks PIC; - PassInstrumentation PI(&PIC); - -@@ -425,9 +413,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { - std::unique_ptr M = parseIR(C, IR); - ASSERT_TRUE(M); - -- DroppedVariableStats Stats(true); -- Stats.runBeforePass("Test", -- llvm::Any(const_cast(M.get()))); -+ DroppedVariableStatsIR Stats(true); -+ Stats.runBeforePass(llvm::Any(const_cast(M.get()))); - - // This loop simulates an IR pass that drops debug information. - for (auto &F : *M) { -@@ -437,9 +424,8 @@ TEST(DroppedVariableStats, InlinedAtChild) { - } - break; - } -- PreservedAnalyses PA; - Stats.runAfterPass("Test", -- llvm::Any(const_cast(M.get())), PA); -+ llvm::Any(const_cast(M.get()))); - ASSERT_EQ(Stats.getPassDroppedVariables(), true); - } - -diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt -index ed93ee547d22..e5c8630f3eed 100644 ---- a/llvm/unittests/IR/CMakeLists.txt -+++ b/llvm/unittests/IR/CMakeLists.txt -@@ -43,7 +43,6 @@ add_llvm_unittest(IRTests - ShuffleVectorInstTest.cpp - StructuralHashTest.cpp - TimePassesTest.cpp -- DroppedVariableStatsTest.cpp - TypesTest.cpp - UseTest.cpp - UserTest.cpp --- -2.46.2 - diff --git a/llvm/include/llvm/CodeGen/DroppedVariableStats.h b/llvm/include/llvm/CodeGen/DroppedVariableStats.h deleted file mode 100644 index c7b654ea58557..0000000000000 --- a/llvm/include/llvm/CodeGen/DroppedVariableStats.h +++ /dev/null @@ -1,224 +0,0 @@ -///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// -/// -/// Part of the LLVM Project, under the Apache License v2.0 with LLVM -/// Exceptions. See https://llvm.org/LICENSE.txt for license information. -/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -/// -///===---------------------------------------------------------------------===// -/// \file -/// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_value that get dropped due to an optimization pass. -/// -///===---------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H -#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H - -#include "llvm/CodeGen/MachinePassManager.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassInstrumentation.h" - -namespace llvm { - -/// A unique key that represents a debug variable. -/// First const DIScope *: Represents the scope of the debug variable. -/// Second const DIScope *: Represents the InlinedAt scope of the debug -/// variable. const DILocalVariable *: It is a pointer to the debug variable -/// itself. -using VarID = - std::tuple; - -/// A base class to collect and print dropped debug information variable -/// statistics. -class DroppedVariableStats { -public: - DroppedVariableStats(bool DroppedVarStatsEnabled) - : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { - if (DroppedVarStatsEnabled) - llvm::outs() - << "Pass Level, Pass Name, Num of Dropped Variables, Func or " - "Module Name\n"; - }; - - virtual ~DroppedVariableStats() = default; - - // We intend this to be unique per-compilation, thus no copies. - DroppedVariableStats(const DroppedVariableStats &) = delete; - void operator=(const DroppedVariableStats &) = delete; - - bool getPassDroppedVariables() { return PassDroppedVariables; } - -protected: - void setup() { - DebugVariablesStack.push_back( - {DenseMap()}); - InlinedAts.push_back( - {DenseMap>()}); - } - - void cleanup() { - assert(!DebugVariablesStack.empty() && - "DebugVariablesStack shouldn't be empty!"); - assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); - DebugVariablesStack.pop_back(); - InlinedAts.pop_back(); - } - - bool DroppedVariableStatsEnabled = false; - struct DebugVariables { - /// DenseSet of VarIDs before an optimization pass has run. - DenseSet DebugVariablesBefore; - /// DenseSet of VarIDs after an optimization pass has run. - DenseSet DebugVariablesAfter; - }; - -protected: - /// A stack of a DenseMap, that maps DebugVariables for every pass to an - /// llvm::Function. A stack is used because an optimization pass can call - /// other passes. - SmallVector> DebugVariablesStack; - - /// A DenseSet tracking whether a scope was visited before. - DenseSet VisitedScope; - /// A stack of DenseMaps, which map the name of an llvm::Function to a - /// DenseMap of VarIDs and their inlinedAt locations before an optimization - /// pass has run. - SmallVector>> InlinedAts; - /// Calculate the number of dropped variables in an llvm::Function or - /// llvm::MachineFunction and print the relevant information to stdout. - void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, - StringRef FuncName, StringRef PassID, - StringRef FuncOrModName, - StringRef PassLevel, const Function *Func); - - /// Check if a \p Var has been dropped or is a false positive. Also update the - /// \p DroppedCount if a debug variable is dropped. - bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, - const DIScope *DbgValScope, - DenseMap &InlinedAtsMap, - VarID Var, unsigned &DroppedCount); - /// Run code to populate relevant data structures over an llvm::Function or - /// llvm::MachineFunction. - void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before); - /// Populate the VarIDSet and InlinedAtMap with the relevant information - /// needed for before and after pass analysis to determine dropped variable - /// status. - void populateVarIDSetAndInlinedMap( - const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before); - /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the - /// debug variable denoted by its ID \p Var may have been dropped by an - /// optimization pass. - virtual void - visitEveryInstruction(unsigned &DroppedCount, - DenseMap &InlinedAtsMap, - VarID Var) = 0; - /// Visit every debug record in an llvm::Function or llvm::MachineFunction - /// and call populateVarIDSetAndInlinedMap on it. - virtual void visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) = 0; - -private: - /// Remove a dropped debug variable's VarID from all Sets in the - /// DroppedVariablesBefore stack. - void removeVarFromAllSets(VarID Var, const Function *F) { - // Do not remove Var from the last element, it will be popped from the - // stack. - for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) - DebugVariablesMap[F].DebugVariablesBefore.erase(Var); - } - /// Return true if \p Scope is the same as \p DbgValScope or a child scope of - /// \p DbgValScope, return false otherwise. - bool isScopeChildOfOrEqualTo(const DIScope *Scope, - const DIScope *DbgValScope); - /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of - /// the InlinedAt chain, return false otherwise. - bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, - const DILocation *DbgValInlinedAt); - bool PassDroppedVariables = false; -}; - -/// A class to collect and print dropped debug information due to LLVM IR -/// optimization passes. After every LLVM IR pass is run, it will print how many -/// #dbg_values were dropped due to that pass. -class DroppedVariableStatsIR : public DroppedVariableStats { -public: - DroppedVariableStatsIR(bool DroppedVarStatsEnabled) - : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} - - void runBeforePass(Any IR) { - setup(); - if (const auto *M = unwrapIR(IR)) - return this->runOnModule(M, true); - if (const auto *F = unwrapIR(IR)) - return this->runOnFunction(F, true); - } - - void runAfterPass(StringRef P, Any IR) { - if (const auto *M = unwrapIR(IR)) - runAfterPassModule(P, M); - else if (const auto *F = unwrapIR(IR)) - runAfterPassFunction(P, F); - cleanup(); - } - - void registerCallbacks(PassInstrumentationCallbacks &PIC); - -private: - const Function *Func; - - void runAfterPassFunction(StringRef PassID, const Function *F) { - runOnFunction(F, false); - calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), - "Function"); - } - - void runAfterPassModule(StringRef PassID, const Module *M) { - runOnModule(M, false); - calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); - } - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Function. - void runOnFunction(const Function *F, bool Before); - /// Iterate over all Instructions in a Function and report any dropped debug - /// information. - void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, - StringRef FuncOrModName, - StringRef PassLevel); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Module. Calls runOnFunction on every Function in the Module. - void runOnModule(const Module *M, bool Before); - /// Iterate over all Functions in a Module and report any dropped debug - /// information. Will call calculateDroppedVarStatsOnFunction on every - /// Function. - void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, - StringRef FuncOrModName, - StringRef PassLevel); - /// Override base class method to run on an llvm::Function specifically. - virtual void - visitEveryInstruction(unsigned &DroppedCount, - DenseMap &InlinedAtsMap, - VarID Var) override; - /// Override base class method to run on #dbg_values specifically. - virtual void visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) override; - - template static const IRUnitT *unwrapIR(Any IR) { - const IRUnitT **IRPtr = llvm::any_cast(&IR); - return IRPtr ? *IRPtr : nullptr; - } -}; - -} // namespace llvm - -#endif diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 12a34c099eaff..9301a12c740ee 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -580,6 +579,83 @@ class PrintCrashIRInstrumentation { static void SignalHandler(void *); }; +/// A class to collect and print dropped debug information variable statistics. +/// After every LLVM IR pass is run, it will print how many #dbg_values were +/// dropped due to that pass. +class DroppedVariableStats { +public: + DroppedVariableStats(bool DroppedVarStatsEnabled) { + if (DroppedVarStatsEnabled) + llvm::outs() + << "Pass Level, Pass Name, Num of Dropped Variables, Func or " + "Module Name\n"; + }; + // We intend this to be unique per-compilation, thus no copies. + DroppedVariableStats(const DroppedVariableStats &) = delete; + void operator=(const DroppedVariableStats &) = delete; + + void registerCallbacks(PassInstrumentationCallbacks &PIC); + void runBeforePass(StringRef PassID, Any IR); + void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); + void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); + bool getPassDroppedVariables() { return PassDroppedVariables; } + +private: + bool PassDroppedVariables = false; + /// A unique key that represents a #dbg_value. + using VarID = + std::tuple; + + struct DebugVariables { + /// DenseSet of VarIDs before an optimization pass has run. + DenseSet DebugVariablesBefore; + /// DenseSet of VarIDs after an optimization pass has run. + DenseSet DebugVariablesAfter; + }; + + /// A stack of a DenseMap, that maps DebugVariables for every pass to an + /// llvm::Function. A stack is used because an optimization pass can call + /// other passes. + SmallVector> DebugVariablesStack; + + /// A DenseSet tracking whether a scope was visited before. + DenseSet VisitedScope; + /// A stack of DenseMaps, which map the name of an llvm::Function to a + /// DenseMap of VarIDs and their inlinedAt locations before an optimization + /// pass has run. + SmallVector>> InlinedAts; + + /// Iterate over all Functions in a Module and report any dropped debug + /// information. Will call calculateDroppedVarStatsOnFunction on every + /// Function. + void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, + std::string FuncOrModName, + std::string PassLevel); + /// Iterate over all Instructions in a Function and report any dropped debug + /// information. + void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, + std::string FuncOrModName, + std::string PassLevel); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Function. + void runOnFunction(const Function *F, bool Before); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Module. Calls runOnFunction on every Function in the Module. + void runOnModule(const Module *M, bool Before); + /// Remove a dropped #dbg_value VarID from all Sets in the + /// DroppedVariablesBefore stack. + void removeVarFromAllSets(VarID Var, const Function *F); + /// Return true if \p Scope is the same as \p DbgValScope or a child scope of + /// \p DbgValScope, return false otherwise. + bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); + /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of + /// the InlinedAt chain, return false otherwise. + bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, + const DILocation *DbgValInlinedAt); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -597,7 +673,7 @@ class StandardInstrumentations { PrintCrashIRInstrumentation PrintCrashIR; IRChangedTester ChangeTester; VerifyInstrumentation Verify; - DroppedVariableStatsIR DroppedStatsIR; + DroppedVariableStats DroppedStats; bool VerifyEach; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 263d4a9ee94d2..7b47c0e6f75db 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -50,7 +50,6 @@ add_llvm_component_library(LLVMCodeGen DeadMachineInstructionElim.cpp DetectDeadLanes.cpp DFAPacketizer.cpp - DroppedVariableStats.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp diff --git a/llvm/lib/CodeGen/DroppedVariableStats.cpp b/llvm/lib/CodeGen/DroppedVariableStats.cpp deleted file mode 100644 index 122fcad1293f1..0000000000000 --- a/llvm/lib/CodeGen/DroppedVariableStats.cpp +++ /dev/null @@ -1,194 +0,0 @@ -///===- DroppedVariableStats.cpp ------------------------------------------===// -/// -/// Part of the LLVM Project, under the Apache License v2.0 with LLVM -/// Exceptions. See https://llvm.org/LICENSE.txt for license information. -/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -/// -///===---------------------------------------------------------------------===// -/// \file -/// Dropped Variable Statistics for Debug Information. Reports any number -/// of #dbg_value that get dropped due to an optimization pass. -/// -///===---------------------------------------------------------------------===// - -#include "llvm/CodeGen/DroppedVariableStats.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Module.h" - -using namespace llvm; - -bool DroppedVariableStats::isScopeChildOfOrEqualTo(const DIScope *Scope, - const DIScope *DbgValScope) { - while (Scope != nullptr) { - if (VisitedScope.find(Scope) == VisitedScope.end()) { - VisitedScope.insert(Scope); - if (Scope == DbgValScope) { - VisitedScope.clear(); - return true; - } - Scope = Scope->getScope(); - } else { - VisitedScope.clear(); - return false; - } - } - return false; -} - -bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( - const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { - if (DbgValInlinedAt == InlinedAt) - return true; - if (!DbgValInlinedAt) - return false; - auto *IA = InlinedAt; - while (IA) { - if (IA == DbgValInlinedAt) - return true; - IA = IA->getInlinedAt(); - } - return false; -} - -void DroppedVariableStats::calculateDroppedStatsAndPrint( - DebugVariables &DbgVariables, StringRef FuncName, StringRef PassID, - StringRef FuncOrModName, StringRef PassLevel, const Function *Func) { - unsigned DroppedCount = 0; - DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; - DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; - DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; - // Find an Instruction that shares the same scope as the dropped #dbg_value or - // has a scope that is the child of the scope of the #dbg_value, and has an - // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain - // contains the inlinedAt of the #dbg_value, if such an Instruction is found, - // debug information is dropped. - for (VarID Var : DebugVariablesBeforeSet) { - if (DebugVariablesAfterSet.contains(Var)) - continue; - visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); - removeVarFromAllSets(Var, Func); - } - if (DroppedCount > 0) { - llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " - << FuncOrModName << "\n"; - PassDroppedVariables = true; - } else - PassDroppedVariables = false; -} - -bool DroppedVariableStats::updateDroppedCount( - DILocation *DbgLoc, const DIScope *Scope, const DIScope *DbgValScope, - DenseMap &InlinedAtsMap, VarID Var, - unsigned &DroppedCount) { - - // If the Scope is a child of, or equal to the DbgValScope and is inlined at - // the Var's InlinedAt location, return true to signify that the Var has been - // dropped. - if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) - if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), - InlinedAtsMap[Var])) { - // Found another instruction in the variable's scope, so there exists a - // break point at which the variable could be observed. Count it as - // dropped. - DroppedCount++; - return true; - } - return false; -} - -void DroppedVariableStats::run(DebugVariables &DbgVariables, StringRef FuncName, - bool Before) { - auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore - : DbgVariables.DebugVariablesAfter); - auto &InlinedAtsMap = InlinedAts.back(); - if (Before) - InlinedAtsMap.try_emplace(FuncName, DenseMap()); - VarIDSet = DenseSet(); - visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); -} - -void DroppedVariableStats::populateVarIDSetAndInlinedMap( - const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) { - VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; - VarIDSet.insert(Key); - if (Before) - InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); -} - -void DroppedVariableStatsIR::runOnFunction(const Function *F, bool Before) { - auto &DebugVariables = DebugVariablesStack.back()[F]; - auto FuncName = F->getName(); - Func = F; - run(DebugVariables, FuncName, Before); -} - -void DroppedVariableStatsIR::calculateDroppedVarStatsOnFunction( - const Function *F, StringRef PassID, StringRef FuncOrModName, - StringRef PassLevel) { - Func = F; - StringRef FuncName = F->getName(); - DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; - calculateDroppedStatsAndPrint(DbgVariables, FuncName, PassID, FuncOrModName, - PassLevel, Func); -} - -void DroppedVariableStatsIR::runOnModule(const Module *M, bool Before) { - for (auto &F : *M) - runOnFunction(&F, Before); -} - -void DroppedVariableStatsIR::calculateDroppedVarStatsOnModule( - const Module *M, StringRef PassID, StringRef FuncOrModName, - StringRef PassLevel) { - for (auto &F : *M) { - calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); - } -} - -void DroppedVariableStatsIR::registerCallbacks( - PassInstrumentationCallbacks &PIC) { - if (!DroppedVariableStatsEnabled) - return; - - PIC.registerBeforeNonSkippedPassCallback( - [this](StringRef P, Any IR) { return runBeforePass(IR); }); - PIC.registerAfterPassCallback( - [this](StringRef P, Any IR, const PreservedAnalyses &PA) { - return runAfterPass(P, IR); - }); - PIC.registerAfterPassInvalidatedCallback( - [this](StringRef P, const PreservedAnalyses &PA) { return cleanup(); }); -} - -void DroppedVariableStatsIR::visitEveryInstruction( - unsigned &DroppedCount, DenseMap &InlinedAtsMap, - VarID Var) { - const DIScope *DbgValScope = std::get<0>(Var); - for (const auto &I : instructions(Func)) { - auto *DbgLoc = I.getDebugLoc().get(); - if (!DbgLoc) - continue; - if (updateDroppedCount(DbgLoc, DbgLoc->getScope(), DbgValScope, - InlinedAtsMap, Var, DroppedCount)) - break; - } -} - -void DroppedVariableStatsIR::visitEveryDebugRecord( - DenseSet &VarIDSet, - DenseMap> &InlinedAtsMap, - StringRef FuncName, bool Before) { - for (const auto &I : instructions(Func)) { - for (DbgRecord &DR : I.getDbgRecordRange()) { - if (auto *Dbg = dyn_cast(&DR)) { - auto *DbgVar = Dbg->getVariable(); - auto DbgLoc = DR.getDebugLoc(); - populateVarIDSetAndInlinedMap(DbgVar, DbgLoc, VarIDSet, InlinedAtsMap, - FuncName, Before); - } - } - } -} diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index b766517e68eba..6259f8f736c80 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -2462,7 +2462,7 @@ StandardInstrumentations::StandardInstrumentations( PrintChanged == ChangePrinter::ColourDiffVerbose || PrintChanged == ChangePrinter::ColourDiffQuiet), WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), - Verify(DebugLogging), DroppedStatsIR(DroppedVarStats), + Verify(DebugLogging), DroppedStats(DroppedVarStats), VerifyEach(VerifyEach) {} PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = @@ -2523,6 +2523,180 @@ void PrintCrashIRInstrumentation::registerCallbacks( }); } +void DroppedVariableStats::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!DroppedVarStats) + return; + + PIC.registerBeforeNonSkippedPassCallback( + [this](StringRef P, Any IR) { return this->runBeforePass(P, IR); }); + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &PA) { + return this->runAfterPass(P, IR, PA); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &PA) { + return this->runAfterPassInvalidated(P, PA); + }); +} + +void DroppedVariableStats::runBeforePass(StringRef PassID, Any IR) { + DebugVariablesStack.push_back({DenseMap()}); + InlinedAts.push_back({DenseMap>()}); + if (auto *M = unwrapIR(IR)) + return this->runOnModule(M, true); + if (auto *F = unwrapIR(IR)) + return this->runOnFunction(F, true); +} + +void DroppedVariableStats::runOnFunction(const Function *F, bool Before) { + auto &DebugVariables = DebugVariablesStack.back()[F]; + auto &VarIDSet = (Before ? DebugVariables.DebugVariablesBefore + : DebugVariables.DebugVariablesAfter); + auto &InlinedAtsMap = InlinedAts.back(); + auto FuncName = F->getName(); + if (Before) + InlinedAtsMap.try_emplace(FuncName, DenseMap()); + VarIDSet = DenseSet(); + for (const auto &I : instructions(F)) { + for (DbgRecord &DR : I.getDbgRecordRange()) { + if (auto *Dbg = dyn_cast(&DR)) { + auto *DbgVar = Dbg->getVariable(); + auto DbgLoc = DR.getDebugLoc(); + VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; + VarIDSet.insert(Key); + if (Before) + InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); + } + } + } +} + +void DroppedVariableStats::runOnModule(const Module *M, bool Before) { + for (auto &F : *M) + runOnFunction(&F, Before); +} + +void DroppedVariableStats::removeVarFromAllSets(VarID Var, const Function *F) { + // Do not remove Var from the last element, it will be popped from the stack. + for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) + DebugVariablesMap[F].DebugVariablesBefore.erase(Var); +} + +void DroppedVariableStats::calculateDroppedVarStatsOnModule( + const Module *M, StringRef PassID, std::string FuncOrModName, + std::string PassLevel) { + for (auto &F : *M) { + calculateDroppedVarStatsOnFunction(&F, PassID, FuncOrModName, PassLevel); + } +} + +void DroppedVariableStats::calculateDroppedVarStatsOnFunction( + const Function *F, StringRef PassID, std::string FuncOrModName, + std::string PassLevel) { + unsigned DroppedCount = 0; + StringRef FuncName = F->getName(); + DebugVariables &DbgVariables = DebugVariablesStack.back()[F]; + DenseSet &DebugVariablesBeforeSet = DbgVariables.DebugVariablesBefore; + DenseSet &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; + DenseMap &InlinedAtsMap = InlinedAts.back()[FuncName]; + // Find an Instruction that shares the same scope as the dropped #dbg_value or + // has a scope that is the child of the scope of the #dbg_value, and has an + // inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt chain + // contains the inlinedAt of the #dbg_value, if such an Instruction is found, + // debug information is dropped. + for (VarID Var : DebugVariablesBeforeSet) { + if (DebugVariablesAfterSet.contains(Var)) + continue; + const DIScope *DbgValScope = std::get<0>(Var); + for (const auto &I : instructions(F)) { + auto *DbgLoc = I.getDebugLoc().get(); + if (!DbgLoc) + continue; + + auto *Scope = DbgLoc->getScope(); + if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) { + if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), + InlinedAtsMap[Var])) { + // Found another instruction in the variable's scope, so there exists + // a break point at which the variable could be observed. Count it as + // dropped. + DroppedCount++; + break; + } + } + } + removeVarFromAllSets(Var, F); + } + if (DroppedCount > 0) { + llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount << ", " + << FuncOrModName << "\n"; + PassDroppedVariables = true; + } else + PassDroppedVariables = false; +} + +void DroppedVariableStats::runAfterPassInvalidated( + StringRef PassID, const PreservedAnalyses &PA) { + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); +} + +void DroppedVariableStats::runAfterPass(StringRef PassID, Any IR, + const PreservedAnalyses &PA) { + std::string PassLevel; + std::string FuncOrModName; + if (auto *M = unwrapIR(IR)) { + this->runOnModule(M, false); + PassLevel = "Module"; + FuncOrModName = M->getName(); + calculateDroppedVarStatsOnModule(M, PassID, FuncOrModName, PassLevel); + } else if (auto *F = unwrapIR(IR)) { + this->runOnFunction(F, false); + PassLevel = "Function"; + FuncOrModName = F->getName(); + calculateDroppedVarStatsOnFunction(F, PassID, FuncOrModName, PassLevel); + } + + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); +} + +bool DroppedVariableStats::isScopeChildOfOrEqualTo(DIScope *Scope, + const DIScope *DbgValScope) { + while (Scope != nullptr) { + if (VisitedScope.find(Scope) == VisitedScope.end()) { + VisitedScope.insert(Scope); + if (Scope == DbgValScope) { + VisitedScope.clear(); + return true; + } + Scope = Scope->getScope(); + } else { + VisitedScope.clear(); + return false; + } + } + return false; +} + +bool DroppedVariableStats::isInlinedAtChildOfOrEqualTo( + const DILocation *InlinedAt, const DILocation *DbgValInlinedAt) { + if (DbgValInlinedAt == InlinedAt) + return true; + if (!DbgValInlinedAt) + return false; + if (!InlinedAt) + return false; + auto *IA = InlinedAt; + while (IA) { + if (IA == DbgValInlinedAt) + return true; + IA = IA->getInlinedAt(); + } + return false; +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) { PrintIR.registerCallbacks(PIC); @@ -2538,7 +2712,7 @@ void StandardInstrumentations::registerCallbacks( WebsiteChangeReporter.registerCallbacks(PIC); ChangeTester.registerCallbacks(PIC); PrintCrashIR.registerCallbacks(PIC); - DroppedStatsIR.registerCallbacks(PIC); + DroppedStats.registerCallbacks(PIC); if (MAM) PreservedCFGChecker.registerCallbacks(PIC, *MAM); diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 807fd1a9b7b56..963cdcc0275e1 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_unittest(CodeGenTests CCStateTest.cpp DIEHashTest.cpp DIETest.cpp - DroppedVariableStatsIRTest.cpp DwarfStringPoolEntryRefTest.cpp InstrRefLDVTest.cpp LowLevelTypeTest.cpp diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index e5c8630f3eed7..ed93ee547d223 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_unittest(IRTests ShuffleVectorInstTest.cpp StructuralHashTest.cpp TimePassesTest.cpp + DroppedVariableStatsTest.cpp TypesTest.cpp UseTest.cpp UserTest.cpp diff --git a/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp b/llvm/unittests/IR/DroppedVariableStatsTest.cpp similarity index 91% rename from llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp rename to llvm/unittests/IR/DroppedVariableStatsTest.cpp index 094ec7b657634..61f3a87bb355e 100644 --- a/llvm/unittests/CodeGen/DroppedVariableStatsIRTest.cpp +++ b/llvm/unittests/IR/DroppedVariableStatsTest.cpp @@ -1,4 +1,5 @@ -//===- unittests/IR/DroppedVariableStatsIRTest.cpp ------------------------===// +//===- unittests/IR/DroppedVariableStatsTest.cpp - TimePassesHandler tests +//----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/AsmParser/Parser.h" -#include "llvm/CodeGen/DroppedVariableStats.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,7 +44,7 @@ namespace { // This test ensures that if a #dbg_value and an instruction that exists in the // same scope as that #dbg_value are both deleted as a result of an optimization // pass, debug information is considered not dropped. -TEST(DroppedVariableStatsIR, BothDeleted) { +TEST(DroppedVariableStats, BothDeleted) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -79,8 +79,9 @@ TEST(DroppedVariableStatsIR, BothDeleted) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -91,15 +92,16 @@ TEST(DroppedVariableStatsIR, BothDeleted) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that shares the same scope as the #dbg_value still exists, // debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, DbgValLost) { +TEST(DroppedVariableStats, DbgValLost) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -134,8 +136,9 @@ TEST(DroppedVariableStatsIR, DbgValLost) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -145,15 +148,16 @@ TEST(DroppedVariableStatsIR, DbgValLost) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has an unrelated scope as the #dbg_value still // exists, debug information is conisdered not dropped. -TEST(DroppedVariableStatsIR, UnrelatedScopes) { +TEST(DroppedVariableStats, UnrelatedScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -189,8 +193,9 @@ TEST(DroppedVariableStatsIR, UnrelatedScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -200,15 +205,16 @@ TEST(DroppedVariableStatsIR, UnrelatedScopes) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } // This test ensures that if a #dbg_value is dropped after an optimization pass, // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, ChildScopes) { +TEST(DroppedVariableStats, ChildScopes) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -244,8 +250,9 @@ TEST(DroppedVariableStatsIR, ChildScopes) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -255,8 +262,9 @@ TEST(DroppedVariableStatsIR, ChildScopes) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -264,7 +272,7 @@ TEST(DroppedVariableStatsIR, ChildScopes) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value is inlined at another location, debug // information is conisdered not dropped. -TEST(DroppedVariableStatsIR, InlinedAt) { +TEST(DroppedVariableStats, InlinedAt) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -300,8 +308,9 @@ TEST(DroppedVariableStatsIR, InlinedAt) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -311,8 +320,9 @@ TEST(DroppedVariableStatsIR, InlinedAt) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), false); } @@ -320,7 +330,7 @@ TEST(DroppedVariableStatsIR, InlinedAt) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the #dbg_value and the instruction are inlined at another // location, debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, InlinedAtShared) { +TEST(DroppedVariableStats, InlinedAtShared) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -356,8 +366,9 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -367,8 +378,9 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } @@ -376,7 +388,7 @@ TEST(DroppedVariableStatsIR, InlinedAtShared) { // but an instruction that has a scope which is a child of the #dbg_value scope // still exists, and the instruction is inlined at a location that is the // #dbg_value's inlined at location, debug information is conisdered dropped. -TEST(DroppedVariableStatsIR, InlinedAtChild) { +TEST(DroppedVariableStats, InlinedAtChild) { PassInstrumentationCallbacks PIC; PassInstrumentation PI(&PIC); @@ -413,8 +425,9 @@ TEST(DroppedVariableStatsIR, InlinedAtChild) { std::unique_ptr M = parseIR(C, IR); ASSERT_TRUE(M); - DroppedVariableStatsIR Stats(true); - Stats.runBeforePass(llvm::Any(const_cast(M.get()))); + DroppedVariableStats Stats(true); + Stats.runBeforePass("Test", + llvm::Any(const_cast(M.get()))); // This loop simulates an IR pass that drops debug information. for (auto &F : *M) { @@ -424,8 +437,9 @@ TEST(DroppedVariableStatsIR, InlinedAtChild) { } break; } + PreservedAnalyses PA; Stats.runAfterPass("Test", - llvm::Any(const_cast(M.get()))); + llvm::Any(const_cast(M.get())), PA); ASSERT_EQ(Stats.getPassDroppedVariables(), true); } From 67d8e1754d7707c90aa15a15ea5bbb430108a162 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 4 Dec 2024 00:51:06 +0000 Subject: [PATCH 152/191] [gn build] Port 259bdc0033d1 --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index bcb46d919b6c1..ab72ac4ae9f4b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -50,7 +50,6 @@ static_library("CodeGen") { "DFAPacketizer.cpp", "DeadMachineInstructionElim.cpp", "DetectDeadLanes.cpp", - "DroppedVariableStats.cpp", "DwarfEHPrepare.cpp", "EHContGuardCatchret.cpp", "EarlyIfConversion.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index dc01cc9a40a9c..a3f89a5648cb5 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -26,7 +26,6 @@ unittest("CodeGenTests") { "CCStateTest.cpp", "DIEHashTest.cpp", "DIETest.cpp", - "DroppedVariableStatsIRTest.cpp", "DwarfStringPoolEntryRefTest.cpp", "InstrRefLDVTest.cpp", "LexicalScopesTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index b19d54d7ed92f..ccee5d79afdcc 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -28,6 +28,7 @@ unittest("IRTests") { "DemandedBitsTest.cpp", "DominatorTreeBatchUpdatesTest.cpp", "DominatorTreeTest.cpp", + "DroppedVariableStatsTest.cpp", "FunctionTest.cpp", "IRBuilderTest.cpp", "InstructionsTest.cpp", From 14a259f85b6cbe6827677d94990c8803e31c847d Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 4 Dec 2024 14:01:16 +1300 Subject: [PATCH 153/191] GlobalOpt: Use the correct address space when creating a "*.init" global. (#118562) --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 9 ++- .../GlobalOpt/malloc-promote-addrspace.ll | 69 +++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/malloc-promote-addrspace.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 3381b5f77683b..16a80e9ebbeaa 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -946,11 +946,10 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI, // If there is a comparison against null, we will insert a global bool to // keep track of whether the global was initialized yet or not. - GlobalVariable *InitBool = - new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, - GlobalValue::InternalLinkage, - ConstantInt::getFalse(GV->getContext()), - GV->getName()+".init", GV->getThreadLocalMode()); + GlobalVariable *InitBool = new GlobalVariable( + Type::getInt1Ty(GV->getContext()), false, GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), GV->getName() + ".init", + GV->getThreadLocalMode(), GV->getAddressSpace()); bool InitBoolUsed = false; // Loop over all instruction uses of GV, processing them in turn. diff --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-addrspace.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-addrspace.ll new file mode 100644 index 0000000000000..bd0957150748f --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-addrspace.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -passes=globalopt -o - < %s | FileCheck %s + +@g = internal addrspace(200) global ptr null, align 8 + +;. +; CHECK: @g.init = internal unnamed_addr addrspace(200) global i1 false +;. +define internal i32 @f1() { +; CHECK-LABEL: define {{[^@]+}}@f1() unnamed_addr { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[G_INIT_VAL:%.*]] = load i1, ptr addrspace(200) @g.init, align 1 +; CHECK-NEXT: call fastcc void @f2() +; CHECK-NEXT: [[NOTINIT:%.*]] = xor i1 [[G_INIT_VAL]], true +; CHECK-NEXT: br i1 [[NOTINIT]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb4: +; CHECK-NEXT: [[I5:%.*]] = phi i32 [ -1, [[BB2]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[I5]] +; +bb: + %i = load ptr addrspace(200), ptr addrspace(200) @g, align 8 + call void @f2() + %i1 = icmp eq ptr addrspace(200) %i, null + br i1 %i1, label %bb2, label %bb3 + +bb2: ; preds = %bb + br label %bb4 + +bb3: ; preds = %bb + br label %bb4 + +bb4: ; preds = %bb3, %bb2 + %i5 = phi i32 [ -1, %bb2 ], [ 1, %bb3 ] + ret i32 %i5 +} + +define internal void @f2() { +; CHECK-LABEL: define {{[^@]+}}@f2() unnamed_addr { +; CHECK-NEXT: bb: +; CHECK-NEXT: store i1 true, ptr addrspace(200) @g.init, align 1 +; CHECK-NEXT: ret void +; +bb: + %i = call noalias ptr @malloc(i64 4) + store ptr %i, ptr addrspace(200) @g, align 8 + ret void +} + +define dso_local i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr { +; CHECK-NEXT: bb: +; CHECK-NEXT: store i1 false, ptr addrspace(200) @g.init, align 1 +; CHECK-NEXT: [[I:%.*]] = call fastcc i32 @f1() +; CHECK-NEXT: ret i32 [[I]] +; +bb: + store ptr null, ptr addrspace(200) @g, align 8 + %i = call i32 @f1() + ret i32 %i +} + +; Function Attrs: allockind("alloc,uninitialized") allocsize(0) +declare dso_local noalias ptr @malloc(i64) #0 + +attributes #0 = { allockind("alloc,uninitialized") allocsize(0) } From 68bcba6d7a1cc18996c0bcb7c62267c62d2040d0 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 3 Dec 2024 20:17:06 -0500 Subject: [PATCH 154/191] Revert "[AMDGPU] Use COV6 by default (#118515)" This reverts commit 410cbe3cf28913cca2fc61b3437306b841d08172 because some buildbots are not ready yet. --- clang/docs/ReleaseNotes.rst | 2 -- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +- clang/test/CodeGen/amdgpu-address-spaces.cpp | 2 +- clang/test/CodeGenCUDA/amdgpu-code-object-version.cu | 2 +- clang/test/CodeGenCXX/dynamic-cast-address-space.cpp | 6 +++--- clang/test/CodeGenHIP/default-attributes.hip | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 6 +++--- .../amdgcn/bitcode/oclc_abi_version_600.bc | 0 .../lib/amdgcn/bitcode/oclc_abi_version_600.bc | 0 .../lib64/amdgcn/bitcode/oclc_abi_version_600.bc | 0 clang/test/Driver/hip-device-libs.hip | 2 +- clang/test/OpenMP/amdgcn_target_global_constructor.cpp | 4 ++-- libc/cmake/modules/prepare_libc_gpu_build.cmake | 2 +- llvm/docs/ReleaseNotes.md | 2 -- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- .../CodeGen/AMDGPU/default_amdhsa_code_object_version.ll | 7 ------- offload/plugins-nextgen/common/src/Utils/ELF.cpp | 5 ++--- 18 files changed, 20 insertions(+), 32 deletions(-) delete mode 100644 clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc delete mode 100644 clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc delete mode 100644 clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc delete mode 100644 llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 366e96f054c20..02284225fb4fa 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -818,8 +818,6 @@ Target Specific Changes AMDGPU Support ^^^^^^^^^^^^^^ -- Bump the default code object version to 6. - - Initial support for gfx950 - Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 29db1aa21ed29..4bc0b97ea68f2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5140,12 +5140,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee", NegFlag>; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, Values<"none,4,5,6">, NormalizedValuesScope<"llvm::CodeObjectVersionKind">, NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>, - MarshallingInfoEnum, "COV_6">; + MarshallingInfoEnum, "COV_5">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 4e6ace48c3ffb..8d977149e6248 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2705,7 +2705,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 6; // default + unsigned CodeObjVer = 5; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp index b121b559f58dc..ae2c61439f4ca 100644 --- a/clang/test/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234; // CHECK: @u = addrspace(5) global i32 undef, align 4 // CHECK: @aaa = addrspace(6) global i32 1000, align 4 // CHECK: @bbb = addrspace(999) global i32 1234, align 4 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index aa0e3edec3f6a..ffe12544917f7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V6 +// RUN: -o - %s | FileCheck %s -check-prefix=V5 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 7eebdf68115a9..271d9ede79d0c 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -13,7 +13,7 @@ B fail; // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 @@ -118,11 +118,11 @@ const B& f(A *a) { // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. -// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} // CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. -// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} // WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index f4dbad021987f..1b53ebec9b582 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -8,7 +8,7 @@ //. // OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0 // OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" -// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 +// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. __device__ void extern_func(); @@ -39,7 +39,7 @@ __global__ void kernel() { // OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } //. -// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} // OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 7f2a17b6ef8c5..62b5661da9dbd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -62,7 +62,7 @@ kernel void test_target_features_kernel(global int *i) { //. // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone // NOCPU-LABEL: define {{[^@]+}}@callee @@ -759,7 +759,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #[[ATTR8]] = { nounwind } // GFX900: attributes #[[ATTR9]] = { convergent nounwind } //. -// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} // NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0} // NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0} @@ -777,7 +777,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: [[META15]] = !{i32 1} // NOCPU: [[META16]] = !{!"int*"} //. -// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0} // GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} diff --git a/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 3ae384cf05d97..6f1d31508e330 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -157,7 +157,7 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 // Test default code object version with old device library without abi_version_400.bc // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp index d728dc1233e2c..9f1e68d4ea0fe 100644 --- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp +++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp @@ -29,7 +29,7 @@ S A; // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4 // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }] // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }] -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 +// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init // CHECK-SAME: () #[[ATTR0:[0-9]+]] { @@ -104,7 +104,7 @@ S A; // CHECK: attributes #[[ATTR4]] = { convergent nounwind } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0} -// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} // CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51} // CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51} diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index f8f5a954e5e91..937bd22451c5f 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -104,7 +104,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) # The AMDGPU environment uses different code objects to encode the ABI for # kernel calls and intrinsic functions. We want to specify this manually to # conform to whatever the test suite was built to handle. - set(LIBC_GPU_CODE_OBJECT_VERSION 6) + set(LIBC_GPU_CODE_OBJECT_VERSION 5) endif() if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 414ca0b919d76..d8d9c4fc4bb8a 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -130,8 +130,6 @@ Changes to the AArch64 Backend Changes to the AMDGPU Backend ----------------------------- -* Bump the default `.amdhsa_code_object_version` to 6. - * Removed `llvm.amdgcn.flat.atomic.fadd` and `llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the {ref}`atomicrmw ` instruction with `fadd` and diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 48eecc511bdae..5a0e812748fbb 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt DefaultAMDHSACodeObjectVersion( "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), + llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)")); diff --git a/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll deleted file mode 100644 index 6f79cf23bfbf7..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s - -; CHECK: .amdhsa_code_object_version 6 - -define amdgpu_kernel void @kernel() { - ret void -} diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index 18b5ad3351b12..88642fd5b5640 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -64,9 +64,8 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 && - Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && - Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) - return createError("Invalid AMD ABI version, must be version above 4"); + Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5) + return createError("Invalid AMD ABI version, must be version 4 or 5"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201) return createError("Unsupported AMDGPU architecture"); From 6a0d6fc2e92bcfb7cb01a4c6cdd751a9b4b4c159 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Tue, 3 Dec 2024 17:19:30 -0800 Subject: [PATCH 155/191] Revert "[StructuralHash] Global Variable (#118412)" This reverts commit 1afb81dfaf902c1c42bd91fec1a7385e6e1529d3. --- llvm/include/llvm/IR/StructuralHash.h | 3 - llvm/lib/CodeGen/MachineStableHash.cpp | 20 ++---- llvm/lib/IR/StructuralHash.cpp | 54 ++-------------- .../AArch64/cgdata-merge-gvar-nsconst.ll | 32 ---------- .../CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 38 ----------- .../AArch64/cgdata-merge-gvar-string.ll | 46 ------------- .../CodeGen/AArch64/cgdata-outline-gvar.ll | 64 ------------------- 7 files changed, 13 insertions(+), 244 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll delete mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll delete mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll delete mode 100644 llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 514dd6f174b90..071575137ff57 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -31,9 +31,6 @@ class Module; /// to true includes instruction and operand type information. stable_hash StructuralHash(const Function &F, bool DetailedHash = false); -/// Returns a hash of the global variable \p G. -stable_hash StructuralHash(const GlobalVariable &G); - /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index 5ab589acee413..facda7a59e2f8 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -27,8 +27,6 @@ #include "llvm/CodeGen/Register.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/StructuralHash.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/ErrorHandling.h" @@ -95,19 +93,13 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { return 0; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); - stable_hash GVHash = 0; - if (auto *GVar = dyn_cast(GV)) - GVHash = StructuralHash(*GVar); - if (!GVHash) { - if (!GV->hasName()) { - ++StableHashBailingGlobalAddress; - return 0; - } - GVHash = stable_hash_name(GV->getName()); + if (!GV->hasName()) { + ++StableHashBailingGlobalAddress; + return 0; } - - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash, - MO.getOffset()); + auto Name = GV->getName(); + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_name(Name), MO.getOffset()); } case MachineOperand::MO_TargetIndex: { diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 56b925626d845..ccc534a890419 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -46,7 +46,7 @@ class StructuralHashImpl { /// Assign a unique ID to each Value in the order they are first seen. DenseMap ValueToId; - static stable_hash hashType(Type *ValueType) { + stable_hash hashType(Type *ValueType) { SmallVector Hashes; Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) @@ -65,7 +65,7 @@ class StructuralHashImpl { } } - static stable_hash hashAPInt(const APInt &I) { + stable_hash hashAPInt(const APInt &I) { SmallVector Hashes; Hashes.emplace_back(I.getBitWidth()); auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); @@ -73,39 +73,11 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - static stable_hash hashAPFloat(const APFloat &F) { + stable_hash hashAPFloat(const APFloat &F) { return hashAPInt(F.bitcastToAPInt()); } - static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { - if (!GVar.hasInitializer()) - return hashGlobalValue(&GVar); - - // Hash the contents of a string. - if (GVar.getName().starts_with(".str")) { - auto *C = GVar.getInitializer(); - if (const auto *Seq = dyn_cast(C)) - if (Seq->isString()) - return stable_hash_name(Seq->getAsString()); - } - - // Hash structural contents of Objective-C metadata in specific sections. - // This can be extended to other metadata if needed. - static constexpr const char *SectionNames[] = { - "__cfstring", "__cstring", "__objc_classrefs", - "__objc_methname", "__objc_selrefs", - }; - if (GVar.hasSection()) { - StringRef SectionName = GVar.getSection(); - for (const char *Name : SectionNames) - if (SectionName.contains(Name)) - return hashConstant(GVar.getInitializer()); - } - - return hashGlobalValue(&GVar); - } - - static stable_hash hashGlobalValue(const GlobalValue *GV) { + stable_hash hashGlobalValue(const GlobalValue *GV) { if (!GV->hasName()) return 0; return stable_hash_name(GV->getName()); @@ -115,7 +87,7 @@ class StructuralHashImpl { // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here // we're interested in computing a hash rather than comparing two Constants. // Some of the logic is simplified, e.g, we don't expand GEPOperator. - static stable_hash hashConstant(const Constant *C) { + stable_hash hashConstant(Constant *C) { SmallVector Hashes; Type *Ty = C->getType(); @@ -126,21 +98,14 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - if (auto *GVar = dyn_cast(C)) { - Hashes.emplace_back(hashGlobalVariable(*GVar)); - return stable_hash_combine(Hashes); - } - if (auto *G = dyn_cast(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } if (const auto *Seq = dyn_cast(C)) { - if (Seq->isString()) { - Hashes.emplace_back(stable_hash_name(Seq->getAsString())); - return stable_hash_combine(Hashes); - } + Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); + return stable_hash_combine(Hashes); } switch (C->getValueID()) { @@ -301,7 +266,6 @@ class StructuralHashImpl { Hashes.emplace_back(Hash); Hashes.emplace_back(GlobalHeaderHash); Hashes.emplace_back(GV.getValueType()->getTypeID()); - Hashes.emplace_back(hashGlobalVariable(GV)); // Update the combined hash in place. Hash = stable_hash_combine(Hashes); @@ -333,10 +297,6 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { return H.getHash(); } -stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { - return StructuralHashImpl::hashGlobalVariable(GVar); -} - stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H(DetailedHash); H.update(M); diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll deleted file mode 100644 index 490a778f69e26..0000000000000 --- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll +++ /dev/null @@ -1,32 +0,0 @@ -; This test verifies that global variables (ns constant) are hashed based on their initial contents, -; allowing them to be merged even if they appear different due to their names. -; Now they become identical functions that can be merged without creating a parameter - -; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s - -; CHECK: _f1.Tgm -; CHECK: _f2.Tgm - -%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } -@__CFConstantStringClassReference = external global [0 x i32] -@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 -@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 - -@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 -@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 - -declare i32 @hoo(ptr noundef) - -define i32 @f1() { -entry: - %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) - %add = sub nsw i32 %call, 1 - ret i32 %add -} - -define i32 @f2() { -entry: - %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) - %add = sub nsw i32 %call, 1 - ret i32 %add -} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll deleted file mode 100644 index 0073114941501..0000000000000 --- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll +++ /dev/null @@ -1,38 +0,0 @@ -; This test verifies that global variables (objc metadata) are hashed based on their initial contents, -; allowing them to be merged even if they appear different due to their names. -; Now they become identical functions that can be merged without creating a parameter - -; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s - -; CHECK: _f1.Tgm -; CHECK: _f2.Tgm - -%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } - -@"OBJC_CLASS_$_MyClass" = external global %struct._class_t -@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 -@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 - -@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 -@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 - -@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 -@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 - -declare ptr @objc_msgSend(ptr, ptr, ...) - -define i32 @f1() { -entry: - %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 - %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 - %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) - ret i32 %call -} - -define i32 @f2() { -entry: - %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 - %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 - %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) - ret i32 %call -} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll deleted file mode 100644 index 1e67425f0b847..0000000000000 --- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll +++ /dev/null @@ -1,46 +0,0 @@ -; This test verifies that global variables (string) are hashed based on their initial contents, -; allowing them to be merged even if they appear different due to their names. -; Now they become identical functions that can be merged without creating a parameter. - -; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s - -; CHECK: _f1.Tgm -; CHECK: _f2.Tgm -; CHECK-NOT: _f3.Tgm -; CHECK-NOT: _f4.Tgm - -; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`. -@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 -@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 -@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1 -@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1 - -declare i32 @goo(ptr noundef) - -define i32 @f1() { -entry: - %call = tail call i32 @goo(ptr noundef nonnull @.str) - %add = add nsw i32 %call, 1 - ret i32 %add -} - -define i32 @f2() { -entry: - %call = tail call i32 @goo(ptr noundef nonnull @.str.1) - %add = add nsw i32 %call, 1 - ret i32 %add -} - -define i32 @f3() { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2) - %add = sub nsw i32 %call, 1 - ret i32 %add -} - -define i32 @f4() { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3) - %add = sub nsw i32 %call, 1 - ret i32 %add -} diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll deleted file mode 100644 index 63ba1d491f9c7..0000000000000 --- a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll +++ /dev/null @@ -1,64 +0,0 @@ -; This test verifies that global variables are hashed based on their initial contents, -; allowing them to be outlined even if they appear different due to their names. - -; RUN: split-file %s %t - -; The outlined function is created locally. -; Note that `.str.3` is commonly used in both `f1()` and `f2()`. -; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ -; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE - -; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}: -; WRITE: adrp x1, l_.str.3 -; WRITE-NEXT: add x1, x1, l_.str.3 -; WRITE-NEXT: mov w2 -; WRITE-NEXT: mov w3 -; WRITE-NEXT: mov w4 -; WRITE-NEXT: b - -; Create an object file and merge it into the cgdata. -; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ -; RUN: -filetype=obj %t/local-two.ll -o %t_write_base -; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base - -; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll. -; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata. - -; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \ -; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ - -; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}: -; READ: adrp x1, l_.str.5 -; READ-NEXT: add x1, x1, l_.str.5 -; READ-NEXT: mov w2 -; READ-NEXT: mov w3 -; READ-NEXT: mov w4 -; READ-NEXT: b - -;--- local-two.ll -@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1 -@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1 -@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 - -declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) -define i32 @f1() minsize { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) - ret i32 %call -} -define i32 @f2() minsize { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) - ret i32 %call -} - -;--- local-one.ll -@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 -@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 - -declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) -define i32 @f1() minsize { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3) - ret i32 %call -} From 46de3a7064250bd2dfc7f8dc6e300474afa9fa97 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Tue, 3 Dec 2024 17:43:36 -0800 Subject: [PATCH 156/191] [HLSL] get inout/out ABI for array parameters working (#111047) Get inout/out parameters working for HLSL Arrays. Utilizes the fix from #109323, and corrects the assignment behavior slightly to allow for Non-LValues on the RHS. Closes #106917 --------- Co-authored-by: Chris B --- clang/include/clang/AST/Type.h | 2 + clang/lib/AST/Type.cpp | 6 + clang/lib/CodeGen/CGCall.cpp | 16 ++- clang/lib/CodeGen/CGExpr.cpp | 7 +- clang/lib/Sema/Sema.cpp | 9 ++ clang/lib/Sema/SemaExprCXX.cpp | 19 ++- clang/lib/Sema/SemaOverload.cpp | 50 ++++--- clang/lib/Sema/SemaType.cpp | 4 +- clang/test/AST/HLSL/ArrayOutArgExpr.hlsl | 63 +++++++++ clang/test/CodeGenHLSL/ArrayAssignable.hlsl | 20 ++- .../BasicFeatures/ArrayOutputArguments.hlsl | 122 ++++++++++++++++++ clang/test/SemaHLSL/ArrayTemporary.hlsl | 6 +- .../Language/ArrayOutputArgs-errors.hlsl | 51 ++++++++ 13 files changed, 331 insertions(+), 44 deletions(-) create mode 100644 clang/test/AST/HLSL/ArrayOutArgExpr.hlsl create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl create mode 100644 clang/test/SemaHLSL/Language/ArrayOutputArgs-errors.hlsl diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 90a52b1dcbf62..6fd6c73a516f0 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3754,6 +3754,8 @@ class ArrayParameterType : public ConstantArrayType { static bool classof(const Type *T) { return T->getTypeClass() == ArrayParameter; } + + QualType getConstantArrayType(const ASTContext &Ctx) const; }; /// Represents a C array with an unspecified size. For example 'int A[]' has diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 366bcc3216b3f..976361d07b68b 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -267,6 +267,12 @@ void ConstantArrayType::Profile(llvm::FoldingSetNodeID &ID, SizeExpr->Profile(ID, Context, true); } +QualType ArrayParameterType::getConstantArrayType(const ASTContext &Ctx) const { + return Ctx.getConstantArrayType(getElementType(), getSize(), getSizeExpr(), + getSizeModifier(), + getIndexTypeQualifiers().getAsOpaqueValue()); +} + DependentSizedArrayType::DependentSizedArrayType(QualType et, QualType can, Expr *e, ArraySizeModifier sm, unsigned tq, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 28a5526fbea06..7c8d962fa5a92 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4725,15 +4725,17 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, return emitWritebackArg(*this, args, CRE); } - assert(type->isReferenceType() == E->isGLValue() && - "reference binding to unmaterialized r-value!"); - // Add writeback for HLSLOutParamExpr. + // Needs to be before the assert below because HLSLOutArgExpr is an LValue + // and is not a reference. if (const HLSLOutArgExpr *OE = dyn_cast(E)) { EmitHLSLOutArgExpr(OE, args, type); return; } + assert(type->isReferenceType() == E->isGLValue() && + "reference binding to unmaterialized r-value!"); + if (E->isGLValue()) { assert(E->getObjectKind() == OK_Ordinary); return args.add(EmitReferenceBindingToExpr(E), type); @@ -5322,6 +5324,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, IRCallArgs[FirstIRArg] = Val; break; } + } else if (I->getType()->isArrayParameterType()) { + // Don't produce a temporary for ArrayParameterType arguments. + // ArrayParameterType arguments are only created from + // HLSL_ArrayRValue casts and HLSLOutArgExpr expressions, both + // of which create temporaries already. This allows us to just use the + // scalar for the decayed array pointer as the argument directly. + IRCallArgs[FirstIRArg] = I->getKnownRValue().getScalarVal(); + break; } // For non-aggregate args and aggregate args meeting conditions above diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 229f0e29f0234..5fccc9cbb37ec 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5827,9 +5827,12 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { // This function implements trivial copy assignment for HLSL's // assignable constant arrays. LValue CodeGenFunction::EmitHLSLArrayAssignLValue(const BinaryOperator *E) { - LValue TrivialAssignmentRHS = EmitLValue(E->getRHS()); + // Don't emit an LValue for the RHS because it might not be an LValue LValue LHS = EmitLValue(E->getLHS()); - EmitAggregateAssign(LHS, TrivialAssignmentRHS, E->getLHS()->getType()); + // In C the RHS of an assignment operator is an RValue. + // EmitAggregateAssign takes anan LValue for the RHS. Instead we can call + // EmitInitializationToLValue to emit an RValue into an LValue. + EmitInitializationToLValue(E->getRHS(), LHS); return LHS; } diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 942e7ece4283e..d6517511d7db4 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -723,6 +723,15 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, QualType ExprTy = Context.getCanonicalType(E->getType()); QualType TypeTy = Context.getCanonicalType(Ty); + // This cast is used in place of a regular LValue to RValue cast for + // HLSL Array Parameter Types. It needs to be emitted even if + // ExprTy == TypeTy, except if E is an HLSLOutArgExpr + // Emitting a cast in that case will prevent HLSLOutArgExpr from + // being handled properly in EmitCallArg + if (Kind == CK_HLSLArrayRValue && !isa(E)) + return ImplicitCastExpr::Create(Context, Ty, Kind, E, BasePath, VK, + CurFPFeatureOverrides()); + if (ExprTy == TypeTy) return E; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d85819b21c826..f58c0fa21e838 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4431,10 +4431,21 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, break; case ICK_HLSL_Array_RValue: - FromType = Context.getArrayParameterType(FromType); - From = ImpCastExprToType(From, FromType, CK_HLSLArrayRValue, VK_PRValue, - /*BasePath=*/nullptr, CCK) - .get(); + if (ToType->isArrayParameterType()) { + FromType = Context.getArrayParameterType(FromType); + From = ImpCastExprToType(From, FromType, CK_HLSLArrayRValue, VK_PRValue, + /*BasePath=*/nullptr, CCK) + .get(); + } else { // FromType must be ArrayParameterType + assert(FromType->isArrayParameterType() && + "FromType must be ArrayParameterType in ICK_HLSL_Array_RValue \ + if it is not ToType"); + const ArrayParameterType *APT = cast(FromType); + FromType = APT->getConstantArrayType(Context); + From = ImpCastExprToType(From, FromType, CK_HLSLArrayRValue, VK_PRValue, + /*BasePath=*/nullptr, CCK) + .get(); + } break; case ICK_Function_To_Pointer: diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 4c9e37bd286de..c174922a926fc 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -2236,33 +2236,24 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, return false; } } - // Lvalue-to-rvalue conversion (C++11 4.1): - // A glvalue (3.10) of a non-function, non-array type T can - // be converted to a prvalue. - bool argIsLValue = From->isGLValue(); - if (argIsLValue && !FromType->canDecayToPointerType() && - S.Context.getCanonicalType(FromType) != S.Context.OverloadTy) { - SCS.First = ICK_Lvalue_To_Rvalue; - - // C11 6.3.2.1p2: - // ... if the lvalue has atomic type, the value has the non-atomic version - // of the type of the lvalue ... - if (const AtomicType *Atomic = FromType->getAs()) - FromType = Atomic->getValueType(); - // If T is a non-class type, the type of the rvalue is the - // cv-unqualified version of T. Otherwise, the type of the rvalue - // is T (C++ 4.1p1). C++ can't get here with class types; in C, we - // just strip the qualifiers because they don't matter. - FromType = FromType.getUnqualifiedType(); - } else if (S.getLangOpts().HLSL && FromType->isConstantArrayType() && - ToType->isConstantArrayType()) { + bool argIsLValue = From->isGLValue(); + // To handle conversion from ArrayParameterType to ConstantArrayType + // this block must be above the one below because Array parameters + // do not decay and when handling HLSLOutArgExprs and + // the From expression is an LValue. + if (S.getLangOpts().HLSL && FromType->isConstantArrayType() && + ToType->isConstantArrayType()) { // HLSL constant array parameters do not decay, so if the argument is a // constant array and the parameter is an ArrayParameterType we have special // handling here. if (ToType->isArrayParameterType()) { FromType = S.Context.getArrayParameterType(FromType); SCS.First = ICK_HLSL_Array_RValue; + } else if (FromType->isArrayParameterType()) { + const ArrayParameterType *APT = cast(FromType); + FromType = APT->getConstantArrayType(S.Context); + SCS.First = ICK_HLSL_Array_RValue; } else { SCS.First = ICK_Identity; } @@ -2273,6 +2264,25 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, SCS.setAllToTypes(ToType); return true; + } else if (argIsLValue && !FromType->canDecayToPointerType() && + S.Context.getCanonicalType(FromType) != S.Context.OverloadTy) { + // Lvalue-to-rvalue conversion (C++11 4.1): + // A glvalue (3.10) of a non-function, non-array type T can + // be converted to a prvalue. + + SCS.First = ICK_Lvalue_To_Rvalue; + + // C11 6.3.2.1p2: + // ... if the lvalue has atomic type, the value has the non-atomic version + // of the type of the lvalue ... + if (const AtomicType *Atomic = FromType->getAs()) + FromType = Atomic->getValueType(); + + // If T is a non-class type, the type of the rvalue is the + // cv-unqualified version of T. Otherwise, the type of the rvalue + // is T (C++ 4.1p1). C++ can't get here with class types; in C, we + // just strip the qualifiers because they don't matter. + FromType = FromType.getUnqualifiedType(); } else if (FromType->isArrayType()) { // Array-to-pointer conversion (C++ 4.2) SCS.First = ICK_Array_To_Pointer; diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index f32edc5ac0644..5fb936297aa54 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -5681,6 +5681,9 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, assert(!T.isNull() && "T must not be null at the end of this function"); if (!AreDeclaratorChunksValid) return Context.getTrivialTypeSourceInfo(T); + + if (state.didParseHLSLParamMod() && !T->isConstantArrayType()) + T = S.HLSL().getInoutParameterType(T); return GetTypeSourceInfoForDeclarator(state, T, TInfo); } @@ -8634,7 +8637,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State, return; if (Attr.getSemanticSpelling() == HLSLParamModifierAttr::Keyword_inout || Attr.getSemanticSpelling() == HLSLParamModifierAttr::Keyword_out) { - CurType = S.HLSL().getInoutParameterType(CurType); State.setParsedHLSLParamMod(true); } } diff --git a/clang/test/AST/HLSL/ArrayOutArgExpr.hlsl b/clang/test/AST/HLSL/ArrayOutArgExpr.hlsl new file mode 100644 index 0000000000000..10825bf0f93bc --- /dev/null +++ b/clang/test/AST/HLSL/ArrayOutArgExpr.hlsl @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s + +// CHECK-LABEL: increment +void increment(inout int Arr[2]) { + for (int I = 0; I < 2; I++) + Arr[0] += 2; +} + +// CHECK-LABEL: call +// CHECK: CallExpr 0x{{.*}} {{.*}} 'void' +// CHECK: ImplicitCastExpr 0x{{.*}} {{.*}} 'void (*)(inout int[2])' +// CHECK: DeclRefExpr 0x{{.*}} {{.*}} 'void (inout int[2])' lvalue Function 0x{{.*}} 'increment' 'void (inout int[2])' +// CHECK: HLSLOutArgExpr 0x{{.*}} {{.*}} 'int[2]' lvalue inout +// CHECK: OpaqueValueExpr [[A:0x.*]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B:0x.*]] {{.*}} 'int[2]' lvalue Var [[E:0x.*]] 'A' 'int[2]' +// CHECK: OpaqueValueExpr [[C:0x.*]] {{.*}} 'int[2]' lvalue +// CHECK: ImplicitCastExpr [[D:0x.*]] {{.*}} 'int[2]' +// CHECK: OpaqueValueExpr [[A]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B]] {{.*}} 'int[2]' lvalue Var [[E]] 'A' 'int[2]' +// CHECK: BinaryOperator 0x{{.*}} {{.*}} 'int[2]' lvalue '=' +// CHECK: OpaqueValueExpr [[A]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr 0x{{.*}} {{.*}} 'int[2]' lvalue Var [[E]] 'A' 'int[2]' +// CHECK: ImplicitCastExpr 0x{{.*}} {{.*}} 'int[2]' +// CHECK: OpaqueValueExpr [[C]] {{.*}} 'int[2]' lvalue +// CHECK: ImplicitCastExpr [[D]] {{.*}} 'int[2]' +// CHECK: OpaqueValueExpr [[A]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B]] {{.*}} 'int[2]' lvalue Var [[E]] 'A' 'int[2]' +export int call() { + int A[2] = { 0, 1 }; + increment(A); + return A[0]; +} + +// CHECK-LABEL: fn2 +void fn2(out int Arr[2]) { + Arr[0] += 5; + Arr[1] += 6; +} + +// CHECK-LABEL: call2 +// CHECK: CallExpr 0x{{.*}} {{.*}} 'void' +// CHECK: ImplicitCastExpr 0x{{.*}} {{.*}} 'void (*)(out int[2])' +// CHECK: DeclRefExpr 0x{{.*}} {{.*}} 'void (out int[2])' lvalue Function 0x{{.*}} 'fn2' 'void (out int[2])' +// CHECK: HLSLOutArgExpr 0x{{.*}} {{.*}} 'int[2]' lvalue out +// CHECK: OpaqueValueExpr [[A:0x.*]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B:0x.*]] {{.*}} 'int[2]' lvalue Var [[E:0x.*]] 'A' 'int[2]' +// CHECK: OpaqueValueExpr [[C:0x.*]] {{.*}} 'int[2]' lvalue +// CHECK: ImplicitCastExpr [[D:0x.*]] {{.*}} 'int[2]' +// CHECK: OpaqueValueExpr [[A]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B]] {{.*}} 'int[2]' lvalue Var [[E]] 'A' 'int[2]' +// CHECK: BinaryOperator 0x{{.*}} {{.*}} 'int[2]' lvalue '=' +// CHECK: OpaqueValueExpr [[A]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B]] {{.*}} 'int[2]' lvalue Var [[E]] 'A' 'int[2]' +// CHECK: ImplicitCastExpr 0x{{.*}} {{.*}} 'int[2]' +// CHECK: OpaqueValueExpr [[C]] {{.*}} 'int[2]' lvalue +// CHECK: ImplicitCastExpr [[D]] {{.*}} 'int[2]' +// CHECK: OpaqueValueExpr [[A]] {{.*}} 'int[2]' lvalue +// CHECK: DeclRefExpr [[B]] {{.*}} 'int[2]' lvalue Var [[E]] 'A' 'int[2]' +export int call2() { + int A[2] = { 0, 1 }; + fn2(A); + return 1; +} diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl index a0dfe26e5d147..e2ff2de68ed99 100644 --- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl +++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl @@ -100,18 +100,16 @@ void arr_assign6() { } // CHECK-LABEL: define void {{.*}}arr_assign7 -// CHECK: [[Arr3:%.*]] = alloca [2 x [2 x i32]], align 4 -// CHECK-NEXT: [[Arr4:%.*]] = alloca [2 x [2 x i32]], align 4 -// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK: [[Arr:%.*]] = alloca [2 x [2 x i32]], align 4 +// CHECK-NEXT: [[Arr2:%.*]] = alloca [2 x [2 x i32]], align 4 // CHECK-NOT: alloca -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr3]], ptr align 4 {{@.*}}, i32 16, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr4]], ptr align 4 {{@.*}}, i32 16, i1 false) -// CHECK-NEXT: store i32 6, ptr [[Tmp]], align 4 -// CHECK-NEXT: [[AIE:%.*]] = getelementptr inbounds i32, ptr [[Tmp]], i32 1 -// CHECK-NEXT: store i32 6, ptr [[AIE]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr3]], ptr align 4 [[Arr4]], i32 16, i1 false) -// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[Arr3]], i32 0, i32 0 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Idx]], ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr]], ptr align 4 {{@.*}}, i32 16, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr2]], ptr align 4 {{@.*}}, i32 16, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr]], ptr align 4 [[Arr2]], i32 16, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[Arr]], i32 0, i32 0 +// CHECK-NEXT: store i32 6, ptr [[Idx]], align 4 +// CHECK-NEXT: [[Idx2:%.*]] = getelementptr inbounds i32, ptr %arrayidx, i32 1 +// CHECK-NEXT: store i32 6, ptr [[Idx2]], align 4 // CHECK-NEXT: ret void void arr_assign7() { int Arr[2][2] = {{0, 1}, {2, 3}}; diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl new file mode 100644 index 0000000000000..eb7d755bca61d --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl @@ -0,0 +1,122 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s + +// CHECK-LABEL: increment +void increment(inout int Arr[2]) { + for (int I = 0; I < 2; I++) + Arr[0] += 2; +} + +// CHECK-LABEL: arrayCall +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4 +// CHECK-NEXT: ret i32 [[B]] +export int arrayCall() { + int A[2] = { 0, 1 }; + increment(A); + return A[0]; +} + +// CHECK-LABEL: fn2 +void fn2(out int Arr[2]) { + Arr[0] += 5; + Arr[1] += 6; +} + +// CHECK-LABEL: arrayCall2 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4 +// CHECK-NEXT: ret i32 [[B]] +export int arrayCall2() { + int A[2] = { 0, 1 }; + fn2(A); + return A[0]; +} + +// CHECK-LABEL: nestedCall +void nestedCall(inout int Arr[2], uint index) { + if (index < 2) { + Arr[index] += 2; + nestedCall(Arr, index+1); + } +} + +// CHECK-LABEL: arrayCall3 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]], i32 noundef 0) #3 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 1 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4 +// CHECK-NEXt: ret i32 [[B]] +export int arrayCall3() { + int A[2] = { 0, 1 }; + nestedCall(A, 0); + return A[1]; +} + +// CHECK-LABEL: outerCall +// CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 %{{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 {{.*}}, ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: ret void +void outerCall(inout int Arr[2]) { + increment(Arr); +} + +// CHECK-LABEL: arrayCall4 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4 +// CHECK-NEXT: ret i32 [[B]] +export int arrayCall4() { + int A[2] = { 0, 1 }; + outerCall(A); + return A[0]; +} + +// CHECK-LABEL: fn3 +void fn3(int Arr[2]) {} + +// CHECK-LABEL: outerCall2 +// CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void {{.*}}fn3{{.*}}(ptr noundef byval([2 x i32]) align 4 [[Tmp]]) #3 +// CHECK-NEXT: ret void +void outerCall2(inout int Arr[2]) { + fn3(Arr); +} + +// CHECK-LABEL: arrayCall5 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4 +// CHECK-NEXT: ret i32 [[B]] +export int arrayCall5() { + int A[2] = { 0, 1 }; + outerCall2(A); + return A[0]; +} diff --git a/clang/test/SemaHLSL/ArrayTemporary.hlsl b/clang/test/SemaHLSL/ArrayTemporary.hlsl index dff9aff7d9b29..0266a198e7ec9 100644 --- a/clang/test/SemaHLSL/ArrayTemporary.hlsl +++ b/clang/test/SemaHLSL/ArrayTemporary.hlsl @@ -75,17 +75,17 @@ void template_fn(T Val) {} // CHECK: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float[2])' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float[2])' lvalue Function {{.*}} 'template_fn' 'void (float[2])' (FunctionTemplate {{.*}} 'template_fn') -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[2]' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[2]' // CHECK-NEXT: DeclRefExpr {{.*}} 'float[2]' lvalue ParmVar {{.*}} 'FA2' 'float[2]' // CHECK-NEXT: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float[4])' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float[4])' lvalue Function {{.*}} 'template_fn' 'void (float[4])' (FunctionTemplate {{.*}} 'template_fn') -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[4]' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[4]' // CHECK-NEXT: DeclRefExpr {{.*}} 'float[4]' lvalue ParmVar {{.*}} 'FA4' 'float[4]' // CHECK-NEXT: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int[3])' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (int[3])' lvalue Function {{.*}} 'template_fn' 'void (int[3])' (FunctionTemplate {{.*}} 'template_fn') -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[3]' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[3]' // CHECK-NEXT: DeclRefExpr {{.*}} 'int[3]' lvalue ParmVar {{.*}} 'IA3' 'int[3]' void call(float FA2[2], float FA4[4], int IA3[3]) { diff --git a/clang/test/SemaHLSL/Language/ArrayOutputArgs-errors.hlsl b/clang/test/SemaHLSL/Language/ArrayOutputArgs-errors.hlsl new file mode 100644 index 0000000000000..46bed0d5a7cbd --- /dev/null +++ b/clang/test/SemaHLSL/Language/ArrayOutputArgs-errors.hlsl @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -verify + +void increment(inout int Arr[2]) { + for (int I = 0; I < 2; I++) + Arr[0] += 2; +} + +export int wrongSize() { + int A[3] = { 0, 1, 2 }; + increment(A); + // expected-error@-1 {{no matching function for call to 'increment'}} + // expected-note@*:* {{candidate function not viable: no known conversion from 'int[3]' to 'int[2]' for 1st argument}} + return A[0]; +} + +export int wrongSize2() { + int A[1] = { 0 }; + increment(A); + // expected-error@-1 {{no matching function for call to 'increment'}} + // expected-note@*:* {{candidate function not viable: no known conversion from 'int[1]' to 'int[2]' for 1st argument}} + return A[0]; +} + +export void tooFewArgs() { + increment(); + // expected-error@-1 {{no matching function for call to 'increment'}} + // expected-note@*:* {{candidate function not viable: requires single argument 'Arr', but no arguments were provided}} +} + +export float wrongType() { + float A[2] = { 0, 1 }; + increment(A); + // expected-error@-1 {{no matching function for call to 'increment'}} + // expected-note@*:* {{candidate function not viable: no known conversion from 'float[2]' to 'int[2]' for 1st argument}} + return A[0]; +} + +export int wrongType2() { + increment(5); + // expected-error@-1 {{no matching function for call to 'increment'}} + // expected-note@*:* {{candidate function not viable: no known conversion from 'int' to 'int[2]' for 1st argument}} + return 1; +} + +export void tooManyArgs() { + int A[2] = { 0, 1 }; + int B[2] = { 2, 3 }; + increment(A, B); + // expected-error@-1 {{no matching function for call to 'increment'}} + // expected-note@*:* {{candidate function not viable: requires single argument 'Arr', but 2 arguments were provided}} +} From f6f16b5f541773bb074dd042746456deff169de2 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 3 Dec 2024 20:54:58 -0500 Subject: [PATCH 157/191] [bazel] update for a0c4f854cad2b97e44a1b58dc1fd982e1c4d60f3 --- .../bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel index 53b400ac0d593..e4b4b075705e8 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel @@ -161,6 +161,7 @@ libc_support_library( deps = [ "//libc:__support_cpp_limits", "//libc:__support_cpp_type_traits", + "//libc:__support_ctype_utils", "//libc:__support_macros_properties_architectures", "//libc:errno.__internal__", "//libc/test/UnitTest:LibcUnitTest", From caa8aa551bf8d2f29e76aad4ac6dcea6940eef13 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 3 Dec 2024 18:25:44 -0800 Subject: [PATCH 158/191] [SelectionDAG] Rename CallOptions::IsSExt to IsSigned. NFC (#118574) This is eventually passed to shouldSignExtendTypeInLibCall which calls it IsSigned. --- llvm/include/llvm/CodeGen/TargetLowering.h | 8 ++++---- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- .../CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 4 ++-- .../SelectionDAG/LegalizeIntegerTypes.cpp | 16 ++++++++-------- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 6a41094ff933b..e9c0280638580 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4713,18 +4713,18 @@ class TargetLowering : public TargetLoweringBase { // shouldExtendTypeInLibCall can get the original type before soften. ArrayRef OpsVTBeforeSoften; EVT RetVTBeforeSoften; - bool IsSExt : 1; + bool IsSigned : 1; bool DoesNotReturn : 1; bool IsReturnValueUsed : 1; bool IsPostTypeLegalization : 1; bool IsSoften : 1; MakeLibCallOptions() - : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), + : IsSigned(false), DoesNotReturn(false), IsReturnValueUsed(true), IsPostTypeLegalization(false), IsSoften(false) {} - MakeLibCallOptions &setSExt(bool Value = true) { - IsSExt = Value; + MakeLibCallOptions &setIsSigned(bool Value = true) { + IsSigned = Value; return *this; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 63536336e9622..2b595b26c9c1c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4794,7 +4794,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(IsStrict ? 1 : 0)); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(Signed); + CallOptions.setIsSigned(Signed); std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain); Results.push_back(Tmp.first); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3f8d117400efd..b52c2c07a7fba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1044,7 +1044,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(IsStrict ? 1 : 0)); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(Signed); + CallOptions.setIsSigned(Signed); CallOptions.setTypeListBeforeSoften(SVT, RVT, true); std::pair Tmp = TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), @@ -2099,7 +2099,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain); if (Strict) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 493abfde148c6..986d69e6c7a9e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2601,7 +2601,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { N->getOperand(1 + OpOffset).getValueType().getSizeInBits() && "POWI exponent should match with sizeof(int) when doing the libcall."); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)}; std::pair Tmp = TLI.makeLibCall( DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); @@ -4006,7 +4006,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT(SDNode *N, SDValue &Lo, if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) CallOptions.setTypeListBeforeSoften(OpVT, VT); else - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); // FIXME: Is this needed? std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl, Chain); SplitInteger(Tmp.first, Lo, Hi); @@ -4098,7 +4098,7 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo, EVT RetVT = N->getValueType(0); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); std::pair Tmp = TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl, Chain); @@ -4269,7 +4269,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, // upper half of the result if it exceeds VT. SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -4640,7 +4640,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -4880,7 +4880,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy); SDValue Ops[2] = {N->getOperand(0), ShAmt}; TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(isSigned); + CallOptions.setIsSigned(isSigned); SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); return; } @@ -4970,7 +4970,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -5659,7 +5659,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_XINT_TO_FP(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this XINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); + CallOptions.setIsSigned(true); std::pair Tmp = TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index bd4bcadb57d7a..5d9e8b35e24ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -160,7 +160,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), - CallOptions.IsSExt); + CallOptions.IsSigned); Entry.IsZExt = !Entry.IsSExt; if (CallOptions.IsSoften && @@ -177,7 +177,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSigned); bool zeroExtend = !signExtend; if (CallOptions.IsSoften && @@ -10876,7 +10876,7 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, // Attempt a libcall. SDValue Ret; TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(Signed); + CallOptions.setIsSigned(Signed); CallOptions.setIsPostTypeLegalization(true); if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { // Halves of WideVT are packed into registers in different order From 3dc97557d1ac0cd7924b9679efbfc8f916f5365e Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 3 Dec 2024 20:34:32 -0600 Subject: [PATCH 159/191] [libc] Fix the GPU build when building inside the NATIVE project (#118573) Summary: We use the NATIVE directory for cross-compiling tools that need to be run on the host. This was not forwarding the CMake arguments we used to check if this was a GPU compile that created its own tools. Forward that and simplify. Fixes https://github.com/llvm/llvm-project/issues/118558 --- libc/CMakeLists.txt | 19 +++---------------- llvm/cmake/modules/CrossCompile.cmake | 3 +++ 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index fd82359022cff..11a355b120360 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -52,22 +52,9 @@ set(LIBC_NAMESPACE ${default_namespace} # We will build the GPU utilities if we are not doing a runtimes build. option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF) -if(LIBC_BUILD_GPU_LOADER OR NOT LLVM_RUNTIMES_BUILD) - foreach(_name ${LLVM_RUNTIME_TARGETS}) - if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES) - if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda") - set(LIBC_NEED_LOADER_UTILS TRUE) - endif() - endif() - endforeach() - if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR - "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda") - set(LIBC_NEED_LOADER_UTILS TRUE) - endif() - if(LIBC_NEED_LOADER_UTILS) - add_subdirectory(utils/gpu) - return() - endif() +if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD)) + add_subdirectory(utils/gpu) + return() endif() add_subdirectory(newhdrgen) diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake index e36a71f522d82..c22d185349dcc 100644 --- a/llvm/cmake/modules/CrossCompile.cmake +++ b/llvm/cmake/modules/CrossCompile.cmake @@ -78,6 +78,9 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) list(APPEND libc_flags -DLLVM_FORCE_BUILD_RUNTIME=ON) endif() endif() + if(LLVM_LIBC_GPU_BUILD) + list(APPEND libc_flags -DLLVM_LIBC_GPU_BUILD=ON) + endif() add_custom_command(OUTPUT ${${project_name}_${target_name}_BUILD}/CMakeCache.txt COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" From e08e5e2c426467342dbe79fbf3ea9723c17a07d1 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Wed, 4 Dec 2024 11:14:37 +0800 Subject: [PATCH 160/191] [mlir][transforms] Use `isExternal` instead of `isDeclaration` for `FunctionOpInterface` (#116573) This PR fixes a bug in `RemoveDeadValues` where the `FunctionOpInterface` does not have the `isDeclaration` method. As a result, we should use the `isExternal` method instead. Fixes #116347. --- mlir/lib/Transforms/RemoveDeadValues.cpp | 4 ++-- mlir/test/Transforms/remove-dead-values.mlir | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp index 0aa9dcb36681b..dbce4a540dcfb 100644 --- a/mlir/lib/Transforms/RemoveDeadValues.cpp +++ b/mlir/lib/Transforms/RemoveDeadValues.cpp @@ -191,10 +191,10 @@ static void cleanSimpleOp(Operation *op, RunLivenessAnalysis &la) { /// non-live across all callers), /// (5) Dropping the uses of these return values from its callers, AND /// (6) Erasing these return values -/// iff it is not public or declaration. +/// iff it is not public or external. static void cleanFuncOp(FunctionOpInterface funcOp, Operation *module, RunLivenessAnalysis &la) { - if (funcOp.isPublic() || funcOp.isDeclaration()) + if (funcOp.isPublic() || funcOp.isExternal()) return; // Get the list of unnecessary (non-live) arguments in `nonLiveArgs`. diff --git a/mlir/test/Transforms/remove-dead-values.mlir b/mlir/test/Transforms/remove-dead-values.mlir index 826f6159a36b6..538755291e81a 100644 --- a/mlir/test/Transforms/remove-dead-values.mlir +++ b/mlir/test/Transforms/remove-dead-values.mlir @@ -377,3 +377,8 @@ func.func @kernel(%arg0: memref<18xf32>) { // CHECK: func.func private @no_block_func_declaration() func.func private @no_block_func_declaration() -> () + +// ----- + +// CHECK: llvm.func @no_block_external_func() +llvm.func @no_block_external_func() attributes {sym_visibility = "private"} From a93b77ce49978dd8fb0d60d9aec8e300b67ce0b8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 3 Dec 2024 19:23:27 -0800 Subject: [PATCH 161/191] [memprof] Fix IndexedMemProfRecord::clear (#118533) This patch ensures that IndexedMemProfRecord::clear clears every field of IndexedMemProfRecord. This fix is not critical at the moment. The only use of this function is in RecordWriterTrait::EmitData to release the memory we are done with. That is, we never clear the data structure for the purpose of reusing it. --- llvm/include/llvm/ProfileData/MemProf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 47487c9342594..6ffead4f13aeb 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -416,7 +416,7 @@ struct IndexedMemProfRecord { // the last entry in the list with the same function GUID. llvm::SmallVector CallSiteIds; - void clear() { AllocSites.clear(); } + void clear() { *this = IndexedMemProfRecord(); } void merge(const IndexedMemProfRecord &Other) { // TODO: Filter out duplicates which may occur if multiple memprof From c1afcaf33b03638fd533d14b0d3f65cd5c50755c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 3 Dec 2024 19:35:52 -0800 Subject: [PATCH 162/191] [RISCV] Match deinterleave(4,8) shuffles to SHL/TRUNC when legal (#118509) We can extend the existing SHL+TRUNC lowering used for deinterleave2 for deinterleave4, and deinterleave8 when the result types are small enough to allow the shift to be legal. On RV64, this means i8 and i16 results for deinterleave4 and i8 results for deinterleave8. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 106 ++++++++---------- .../rvv/fixed-vectors-deinterleave-load.ll | 19 ++-- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 22 +--- .../fixed-vectors-shuffle-changes-length.ll | 5 +- .../rvv/fixed-vectors-shuffle-deinterleave.ll | 61 ++++------ .../rvv/fixed-vectors-shufflevector-vnsrl.ll | 52 ++++++--- .../RISCV/rvv/vector-deinterleave-fixed.ll | 17 +-- 7 files changed, 131 insertions(+), 151 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1e185956bd30e..4544a922def1a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4446,34 +4446,9 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, VL); } -// Is this a shuffle extracts either the even or odd elements of a vector? -// That is, specifically, either (a) or (b) in the options below. -// Single operand shuffle is easy: -// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef -// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef -// Double operand shuffle: -// t34: v8i8 = extract_subvector t11, Constant:i64<0> -// t33: v8i8 = extract_subvector t11, Constant:i64<8> -// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 -// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 -static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, - SDValue V2, ArrayRef Mask, - const RISCVSubtarget &Subtarget) { - // Need to be able to widen the vector. - if (VT.getScalarSizeInBits() >= Subtarget.getELen()) - return SDValue(); - - // First index must be the first even or odd element from V1. - if (Mask[0] != 0 && Mask[0] != 1) - return SDValue(); - - // The others must increase by 2 each time. - for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) - return SDValue(); - - if (1 == count_if(Mask, [](int Idx) { return Idx != -1; })) - return SDValue(); +// Can this shuffle be performed on exactly one (possibly larger) input? +static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, + SDValue V2) { if (V2.isUndef() && RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8) @@ -4490,12 +4465,13 @@ static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, return SDValue(); // Src needs to have twice the number of elements. - if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) + unsigned NumElts = VT.getVectorNumElements(); + if (Src.getValueType().getVectorNumElements() != (NumElts * 2)) return SDValue(); // The extracts must extract the two halves of the source. if (V1.getConstantOperandVal(1) != 0 || - V2.getConstantOperandVal(1) != Mask.size()) + V2.getConstantOperandVal(1) != NumElts) return SDValue(); return Src; @@ -4612,36 +4588,29 @@ static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef Mask) { return Rotation; } -// Lower a deinterleave shuffle to vnsrl. -// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) -// -> [p, q, r, s] (EvenElts == false) -// VT is the type of the vector to return, <[vscale x ]n x ty> -// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> -static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, - bool EvenElts, SelectionDAG &DAG) { - // The result is a vector of type . The source is a vector of - // type (For the single source case, the high half is undef) - if (Src.getValueType() == VT) { - EVT WideVT = VT.getDoubleNumVectorElementsVT(); - Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, DAG.getUNDEF(WideVT), - Src, DAG.getVectorIdxConstant(0, DL)); - } - - // Bitcast the source vector from -> - // This also converts FP to int. +// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be +// 2, 4, 8 and the integer type Factor-times larger than VT's +// element type must be a legal element type. +// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0) +// -> [p, q, r, s] (Factor=2, Index=1) +static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, + SDValue Src, unsigned Factor, + unsigned Index, SelectionDAG &DAG) { unsigned EltBits = VT.getScalarSizeInBits(); - MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * 2), - VT.getVectorElementCount()); + ElementCount SrcEC = Src.getValueType().getVectorElementCount(); + MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), + SrcEC.divideCoefficientBy(Factor)); + MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), + SrcEC.divideCoefficientBy(Factor)); Src = DAG.getBitcast(WideSrcVT, Src); - MVT IntVT = VT.changeVectorElementTypeToInteger(); - - // If we want even elements, then the shift amount is 0. Otherwise, shift by - // the original element size. - unsigned Shift = EvenElts ? 0 : EltBits; + unsigned Shift = Index * EltBits; SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src, DAG.getConstant(Shift, DL, WideSrcVT)); - Res = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Res); + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res); + MVT IntVT = VT.changeVectorElementTypeToInteger(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res, + DAG.getVectorIdxConstant(0, DL)); return DAG.getBitcast(VT, Res); } @@ -5332,11 +5301,24 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef()) return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1); - // If this is a deinterleave and we can widen the vector, then we can use - // vnsrl to deinterleave. - if (SDValue Src = - isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) - return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, DAG); + // If this is a deinterleave(2,4,8) and we can widen the vector, then we can + // use shift and truncate to perform the shuffle. + // TODO: For Factor=6, we can perform the first step of the deinterleave via + // shift-and-trunc reducing total cost for everything except an mf8 result. + // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough + // to do the entire operation. + if (VT.getScalarSizeInBits() < Subtarget.getELen()) { + const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits(); + assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8); + for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) { + unsigned Index = 0; + if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) && + 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { + if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2)) + return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG); + } + } + } if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) @@ -10739,8 +10721,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, // We can deinterleave through vnsrl.wi if the element type is smaller than // ELEN if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { - SDValue Even = getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, DAG); - SDValue Odd = getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, DAG); + SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG); + SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG); return DAG.getMergeValues({Even, Odd}, DL); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index ae5dbfa4bf30b..ede25d2c9bb07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -24,19 +24,20 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v11, v11, -15 ; CHECK-NEXT: vmerge.vim v13, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vnsrl.wi v8, v14, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v8, 8 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vrgather.vv v8, v13, v12, v0.t -; CHECK-NEXT: vnsrl.wi v12, v14, 8 -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vrgather.vv v12, v13, v11, v0.t -; CHECK-NEXT: vmsne.vi v8, v12, 0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v10, v13, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v13, v11, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 21417fe8deefb..5d307211ead6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -721,24 +721,12 @@ define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) { define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) { ; CHECK-LABEL: shuffle_v64i8_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4112 -; CHECK-NEXT: li a1, 240 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a0, a0, 257 -; CHECK-NEXT: vmv.s.x v14, a0 -; CHECK-NEXT: lui a0, 98561 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vcompress.vm v12, v8, v14 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: addi a0, a0, -2048 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vrgather.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v8, v12 +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: ret %s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> ret <8 x i8> %s diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index 9d2c722334b08..66f95b7077672 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -104,7 +104,7 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV32-NEXT: vmv.v.i v0, 10 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV32-NEXT: vslideup.vi v14, v12, 1 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v12, v8, a0 ; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 8 @@ -116,9 +116,8 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV64-LABEL: v4i32_v16i32: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.i v0, 10 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vnsrl.wx v12, v8, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index 6450174d44ca8..08fd4fb85ff3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -67,22 +67,12 @@ define void @deinterleave4_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vwaddu.vv v10, v8, v9 -; CHECK-NEXT: vwmaccu.vx v10, a0, v9 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsll.vi v9, v9, 2 -; CHECK-NEXT: vadd.vi v9, v9, -8 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -96,20 +86,11 @@ define void @deinterleave4_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v9, 4 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vwaddu.vv v11, v9, v10 -; CHECK-NEXT: vwmaccu.vx v11, a0, v10 -; CHECK-NEXT: li a0, 34 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v11, v0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: @@ -268,10 +249,12 @@ define void @deinterleave8_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret @@ -287,12 +270,14 @@ define void @deinterleave8_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v0, -3 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t -; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 312520ae28374..3aa16070470ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -441,13 +441,25 @@ entry: } define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { -; CHECK-LABEL: vnsrl_0_i8_single_src: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vnsrl.wi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: ret +; V-LABEL: vnsrl_0_i8_single_src: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vnsrl.wi v8, v8, 0 +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vse8.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_0_i8_single_src: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vse8.v v8, (a1) +; ZVE32F-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> @@ -456,13 +468,25 @@ entry: } define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { -; CHECK-LABEL: vnsrl_0_i8_single_src2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vnsrl.wi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: ret +; V-LABEL: vnsrl_0_i8_single_src2: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vnsrl.wi v8, v8, 0 +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vse8.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_0_i8_single_src2: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vse8.v v8, (a1) +; ZVE32F-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 41cf886c3ab75..6de846b2582da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -19,18 +19,19 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vadd.vi v8, v12, -16 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vadd.vi v10, v12, -16 ; CHECK-NEXT: vadd.vi v12, v12, -15 -; CHECK-NEXT: vnsrl.wi v10, v14, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v13, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v8, 8 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vrgather.vv v10, v11, v8, v0.t -; CHECK-NEXT: vnsrl.wi v8, v14, 8 -; CHECK-NEXT: vmsne.vi v10, v10, 0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v13, v11, v10, v0.t ; CHECK-NEXT: vrgather.vv v8, v11, v12, v0.t +; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vmsne.vi v8, v8, 0 -; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) ret {<16 x i1>, <16 x i1>} %retval From f947d5afd951fe0883e8afe2d00c00d6a97e29bd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 3 Dec 2024 19:47:54 -0800 Subject: [PATCH 163/191] [RISCV] Reduce redundancy in vnsrl tests Triggered by discussion on pr118509. --- .../rvv/fixed-vectors-shufflevector-vnsrl.ll | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 3aa16070470ea..847ef9a7b3601 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -447,7 +447,6 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { ; V-NEXT: vle8.v v8, (a0) ; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma ; V-NEXT: vnsrl.wi v8, v8, 0 -; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; V-NEXT: vse8.v v8, (a1) ; V-NEXT: ret ; @@ -457,18 +456,42 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { ; ZVE32F-NEXT: vle8.v v8, (a0) ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; ZVE32F-NEXT: vse8.v v8, (a1) +; ZVE32F-NEXT: ret +entry: + %0 = load <8 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> + store <4 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @vnsrl_8_i8_single_src(ptr %in, ptr %out) { +; V-LABEL: vnsrl_8_i8_single_src: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vnsrl.wi v8, v8, 8 +; V-NEXT: vse8.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_8_i8_single_src: +; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vnsrl.wi v8, v8, 8 ; ZVE32F-NEXT: vse8.v v8, (a1) ; ZVE32F-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 - %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> - store <8 x i8> %shuffle.i5, ptr %out, align 1 + %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> + store <4 x i8> %shuffle.i5, ptr %out, align 1 ret void } -define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { -; V-LABEL: vnsrl_0_i8_single_src2: +define void @vnsrl_0_i8_single_wideuse(ptr %in, ptr %out) { +; V-LABEL: vnsrl_0_i8_single_wideuse: ; V: # %bb.0: # %entry ; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; V-NEXT: vle8.v v8, (a0) @@ -478,7 +501,7 @@ define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { ; V-NEXT: vse8.v v8, (a1) ; V-NEXT: ret ; -; ZVE32F-LABEL: vnsrl_0_i8_single_src2: +; ZVE32F-LABEL: vnsrl_0_i8_single_wideuse: ; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; ZVE32F-NEXT: vle8.v v8, (a0) From 95566af789d208b8fc422644ab282a43911041f2 Mon Sep 17 00:00:00 2001 From: Max Winkler Date: Tue, 3 Dec 2024 20:18:16 -0800 Subject: [PATCH 164/191] [Clang][AST] Fix MS Mangle concept uneval context template instantiation crash (#117845) Fixes https://github.com/llvm/llvm-project/issues/115990. MSVC mangling got inadvertently broken here, https://github.com/llvm/llvm-project/pull/83997, when it was fixed what decl context a lambda is apart of for uneval contexts. https://godbolt.org/z/K6jb5v145 for reference. Given the following code snippet ``` template concept C = requires(const T& t) { { T::test([](){}) }; }; template struct Widget; template struct Widget {}; struct Baz { template static constexpr decltype(auto) test(F&& f) {} }; void test() { Widget w; } ``` `Baz::test` has a deduced return type which means we must instantiate that template even in an unevaluated context. The lambda inside the concept is within the decl context of `struct Widget {};`. So we end up needing to mangle a name of `Baz::test::lambda()>>()` since the lambda isn't apart of an instantiated substituted class `Widget` yet at the point the lambda is instantiated. Upon template instantation of `test` we end up asking for the mangled name so we can add this instantiation to `CodeGenModule::DefferredDecls` since `test` is now referenced but not yet used. I think the longer term more correct solution is to key `DefferedDecls` off of something else than the mangled name to avoid having to mangle names for instantations that are referenced but will never be used since they are only instantiated from an unevaluated context. As a fix for the regression I just created a custom mangling scheme for this case since MSVC has no comparable naming scheme as such a template will never be emitted into the resulting obj as it will never be used. --- clang/lib/AST/MicrosoftMangle.cpp | 10 +++++++- .../CodeGenCXX/ms-uneval-context-crash.cpp | 25 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCXX/ms-uneval-context-crash.cpp diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 94a7ce6c1321d..7642ff7ca606c 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3372,7 +3372,15 @@ void MicrosoftCXXNameMangler::mangleType(const MemberPointerType *T, void MicrosoftCXXNameMangler::mangleType(const TemplateTypeParmType *T, Qualifiers, SourceRange Range) { - Error(Range.getBegin(), "template type parameter type") << Range; + Out << '?'; + + llvm::SmallString<64> Name; + Name += "getDepth()); + Name += "_"; + Name += llvm::utostr(T->getIndex()); + Name += ">"; + mangleSourceName(Name); } void MicrosoftCXXNameMangler::mangleType(const SubstTemplateTypeParmPackType *T, diff --git a/clang/test/CodeGenCXX/ms-uneval-context-crash.cpp b/clang/test/CodeGenCXX/ms-uneval-context-crash.cpp new file mode 100644 index 0000000000000..b2f7e58381da8 --- /dev/null +++ b/clang/test/CodeGenCXX/ms-uneval-context-crash.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++20 -fms-compatibility -fms-compatibility-version=19.33 -emit-llvm %s -o - -triple=x86_64-windows-msvc | FileCheck %s + +template +concept C = requires +{ + { T::test([](){}) }; +}; + +template +struct Widget {}; + +template +struct Widget {}; + +struct Baz +{ + template + static constexpr decltype(auto) test(F&&) {} +}; + +void test() +{ + Widget w; +} +// CHECK: @"?test@@YAXXZ" From 982575fd0668aca94d400ea575c1b5d3ea398e37 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 3 Dec 2024 20:51:50 -0800 Subject: [PATCH 165/191] [lld-link] Add context-aware diagnostic functions (#118430) Similar to #112319 for ELF. While there is some initial boilerplate, it can simplify some call sites that use Twine, especially when a printed element uses `ctx` or toString. --- lld/COFF/Config.h | 44 ++++++++++++++++++++++++++++++++++++++++ lld/COFF/Driver.cpp | 45 +++++++++++++++++++++++++++++++---------- lld/COFF/InputFiles.cpp | 5 +++++ lld/COFF/InputFiles.h | 2 ++ lld/COFF/Symbols.cpp | 7 +++++++ lld/COFF/Symbols.h | 3 +++ 6 files changed, 95 insertions(+), 11 deletions(-) diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 57cb443798cd8..9e6b17e87c9e7 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -9,6 +9,7 @@ #ifndef LLD_COFF_CONFIG_H #define LLD_COFF_CONFIG_H +#include "lld/Common/ErrorHandler.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -27,6 +28,7 @@ namespace lld::coff { using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; using llvm::COFF::WindowsSubsystem; using llvm::StringRef; +class COFFLinkerContext; class DefinedAbsolute; class StringChunk; class Symbol; @@ -332,6 +334,48 @@ struct Configuration { BuildIDHash buildIDHash = BuildIDHash::None; }; +struct COFFSyncStream : SyncStream { + COFFLinkerContext &ctx; + COFFSyncStream(COFFLinkerContext &ctx, DiagLevel level); +}; + +template +std::enable_if_t>, + const COFFSyncStream &> +operator<<(const COFFSyncStream &s, T &&v) { + s.os << std::forward(v); + return s; +} + +inline const COFFSyncStream &operator<<(const COFFSyncStream &s, + const char *v) { + s.os << v; + return s; +} + +inline const COFFSyncStream &operator<<(const COFFSyncStream &s, Error v) { + s.os << llvm::toString(std::move(v)); + return s; +} + +// Report a log if -verbose is specified. +COFFSyncStream Log(COFFLinkerContext &ctx); + +// Print a message to stdout. +COFFSyncStream Msg(COFFLinkerContext &ctx); + +// Report a warning. Upgraded to an error if /WX is specified. +COFFSyncStream Warn(COFFLinkerContext &ctx); + +// Report an error that will suppress the output file generation. +COFFSyncStream Err(COFFLinkerContext &ctx); + +// Report a fatal error that exits immediately. This should generally be avoided +// in favor of Err. +COFFSyncStream Fatal(COFFLinkerContext &ctx); + +uint64_t errCount(COFFLinkerContext &ctx); + } // namespace lld::coff #endif diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index a0bff69c6302a..e4cfcb335869a 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -56,11 +56,33 @@ #include #include +using namespace lld; +using namespace lld::coff; using namespace llvm; using namespace llvm::object; using namespace llvm::COFF; using namespace llvm::sys; +COFFSyncStream::COFFSyncStream(COFFLinkerContext &ctx, DiagLevel level) + : SyncStream(ctx.e, level), ctx(ctx) {} + +COFFSyncStream coff::Log(COFFLinkerContext &ctx) { + return {ctx, DiagLevel::Log}; +} +COFFSyncStream coff::Msg(COFFLinkerContext &ctx) { + return {ctx, DiagLevel::Msg}; +} +COFFSyncStream coff::Warn(COFFLinkerContext &ctx) { + return {ctx, DiagLevel::Warn}; +} +COFFSyncStream coff::Err(COFFLinkerContext &ctx) { + return {ctx, DiagLevel::Err}; +} +COFFSyncStream coff::Fatal(COFFLinkerContext &ctx) { + return {ctx, DiagLevel::Fatal}; +} +uint64_t coff::errCount(COFFLinkerContext &ctx) { return ctx.e.errorCount; } + namespace lld::coff { bool link(ArrayRef args, llvm::raw_ostream &stdoutOS, @@ -75,7 +97,7 @@ bool link(ArrayRef args, llvm::raw_ostream &stdoutOS, ctx->driver.linkerMain(args); - return errorCount() == 0; + return errCount(*ctx) == 0; } // Parse options of the form "old;new". @@ -212,7 +234,8 @@ void LinkerDriver::addBuffer(std::unique_ptr mb, ctx.symtab.addFile(make(ctx, mbref)); break; case file_magic::coff_cl_gl_object: - error(filename + ": is not a native COFF file. Recompile without /GL"); + Err(ctx) << filename + << ": is not a native COFF file. Recompile without /GL"; break; case file_magic::pecoff_executable: if (ctx.config.mingw) { @@ -302,7 +325,7 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName, obj->parentName = parentName; ctx.symtab.addFile(obj); - log("Loaded " + toString(obj) + " for " + symName); + Log(ctx) << "Loaded " << obj << " for " << symName; } void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, @@ -310,9 +333,9 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, StringRef parentName) { auto reportBufferError = [=](Error &&e, StringRef childName) { - fatal("could not get the buffer for the member defining symbol " + - toCOFFString(ctx, sym) + ": " + parentName + "(" + childName + - "): " + toString(std::move(e))); + Fatal(ctx) << "could not get the buffer for the member defining symbol " + << &sym << ": " << parentName << "(" << childName + << "): " << std::move(e); }; if (!c.getParent()->isThin()) { @@ -361,7 +384,7 @@ void LinkerDriver::parseDirectives(InputFile *file) { if (s.empty()) return; - log("Directives: " + toString(file) + ": " + s); + Log(ctx) << "Directives: " << file << ": " << s; ArgParser parser(ctx); // .drectve is always tokenized using Windows shell rules. @@ -414,7 +437,7 @@ void LinkerDriver::parseDirectives(InputFile *file) { break; case OPT_entry: if (!arg->getValue()[0]) - fatal("missing entry point symbol name"); + Fatal(ctx) << "missing entry point symbol name"; ctx.config.entry = addUndefined(mangle(arg->getValue()), true); break; case OPT_failifmismatch: @@ -779,7 +802,7 @@ StringRef LinkerDriver::findDefaultEntry() { if (findUnderscoreMangle("wWinMain")) { if (!findUnderscoreMangle("WinMain")) return mangle("wWinMainCRTStartup"); - warn("found both wWinMain and WinMain; using latter"); + Warn(ctx) << "found both wWinMain and WinMain; using latter"; } return mangle("WinMainCRTStartup"); } @@ -2200,7 +2223,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { config->incremental = false; } - if (errorCount()) + if (errCount(ctx)) return; std::set wholeArchives; @@ -2279,7 +2302,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { stream << " " << path << "\n"; } - message(buffer); + Msg(ctx) << buffer; } // Process files specified as /defaultlib. These must be processed after diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 6b5efb34b3f3e..9e33774d695fa 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -70,6 +70,11 @@ std::string lld::toString(const coff::InputFile *file) { .str(); } +const COFFSyncStream &coff::operator<<(const COFFSyncStream &s, + const InputFile *f) { + return s << toString(f); +} + /// Checks that Source is compatible with being a weak alias to Target. /// If Source is Undefined and has no weak alias set, makes it a weak /// alias to Target. diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 77f7e298166ee..e727d1376e2f2 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -40,6 +40,8 @@ class DWARFCache; namespace coff { class COFFLinkerContext; +const COFFSyncStream &operator<<(const COFFSyncStream &, const InputFile *); + std::vector getArchiveMembers(llvm::object::Archive *file); using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index f2fa2392ecbbc..383f62afd8e1d 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -53,6 +53,13 @@ std::string toCOFFString(const COFFLinkerContext &ctx, return maybeDemangleSymbol(ctx, b.getName()); } +const COFFSyncStream & +coff::operator<<(const COFFSyncStream &s, + const llvm::object::Archive::Symbol *sym) { + s << maybeDemangleSymbol(s.ctx, sym->getName()); + return s; +} + namespace coff { void Symbol::computeName() { diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 203a542466c68..6fabed9fc8f2b 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -35,6 +35,9 @@ class InputFile; class ObjFile; class SymbolTable; +const COFFSyncStream &operator<<(const COFFSyncStream &, + const llvm::object::Archive::Symbol *); + // The base class for real symbol classes. class Symbol { public: From 442ee78cb414cd7101482bb677e62631e94e5480 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 3 Dec 2024 20:55:56 -0800 Subject: [PATCH 166/191] [lldb-dap] Fix Markdown tables in README.md The markdown tables in the README aren't getting rendered correctly on the LLDB-DAP page in the Visual Studio arketplace [1]. This is a somewhat speculative fix as the table itself appears to be correct. Even if this change doesn't fix it, the new formatting significantly improves the readability. [1] https://marketplace.visualstudio.com/items?itemName=llvm-vs-code-extensions.lldb-dap --- lldb/tools/lldb-dap/README.md | 84 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/lldb/tools/lldb-dap/README.md b/lldb/tools/lldb-dap/README.md index 42b5f501e32c6..8196dfdd5073c 100644 --- a/lldb/tools/lldb-dap/README.md +++ b/lldb/tools/lldb-dap/README.md @@ -6,9 +6,9 @@ The extension requires the `lldb-dap` (formerly `lldb-vscode`) binary. This binary is not packaged with the VS Code extension. There are multiple ways to obtain this binary: -* use the binary provided by your toolchain (for example `xcrun -f lldb-dap` on macOS) or contact your toolchain vendor to include it. -* download one of the relase packages from the [LLVM release page](https://github.com/llvm/llvm-project/releases/). The `LLVM-19.1.0-{operating_system}.tar.xz` packages contain a prebuilt `lldb-dap` binary. -* build it from source (see [LLDB's build instructions](https://lldb.llvm.org/resources/build.html)) +* Use the binary provided by your toolchain (for example `xcrun -f lldb-dap` on macOS) or contact your toolchain vendor to include it. +* Download one of the relase packages from the [LLVM release page](https://github.com/llvm/llvm-project/releases/). The `LLVM-19.1.0-{operating_system}.tar.xz` packages contain a prebuilt `lldb-dap` binary. +* Build it from source (see [LLDB's build instructions](https://lldb.llvm.org/resources/build.html)). By default, the VS Code extension will expect to find `lldb-dap` in your `PATH`. Alternatively, you can explictly specify the location of the `lldb-dap` binary using the `lldb-dap.executable-path` setting. @@ -179,26 +179,26 @@ The default hostname being used `localhost`. For both launch and attach configurations, lldb-dap accepts the following `lldb-dap` specific key/value pairs: -|parameter |type|req | | -|-------------------|----|:--:|---------| -|**name** |string|Y| A configuration name that will be displayed in the IDE. -|**type** |string|Y| Must be "lldb-dap". -|**request** |string|Y| Must be "launch" or "attach". -|**program** |string|Y| Path to the executable to launch. -|**sourcePath** |string| | Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths. -|**sourceMap** |[string[2]]| | Specify an array of path re-mappings. Each element in the array must be a two element array containing a source and destination pathname. Overrides sourcePath. -|**debuggerRoot** | string| |Specify a working directory to use when launching lldb-dap. If the debug information in your executable contains relative paths, this option can be used so that `lldb-dap` can find source files and object files that have relative paths. -|**commandEscapePrefix** | string | | The escape prefix to use for executing regular LLDB commands in the Debug Console, instead of printing variables. Defaults to a backtick. If it's an empty string, then all expression in the Debug Console are treated as regular LLDB commands. -|**customFrameFormat** | string | | If non-empty, stack frames will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for frames. If the format string contains errors, an error message will be displayed on the Debug Console and the default frame names will be used. This might come with a performance cost because debug information might need to be processed to generate the description. -|**customThreadFormat** | string | | Same as `customFrameFormat`, but for threads instead of stack frames. -|**displayExtendedBacktrace**|bool| | Enable language specific extended backtraces. -|**enableAutoVariableSummaries**|bool| | Enable auto generated summaries for variables when no summaries exist for a given type. This feature can cause performance delays in large projects when viewing variables. -|**enableSyntheticChildDebugging**|bool| | If a variable is displayed using a synthetic children, also display the actual contents of the variable at the end under a [raw] entry. This is useful when creating sythetic child plug-ins as it lets you see the actual contents of the variable. -|**initCommands** |[string]| | LLDB commands executed upon debugger startup prior to creating the LLDB target. -|**preRunCommands** |[string]| | LLDB commands executed just before launching/attaching, after the LLDB target has been created. -|**stopCommands** |[string]| | LLDB commands executed just after each stop. -|**exitCommands** |[string]| | LLDB commands executed when the program exits. -|**terminateCommands** |[string]| | LLDB commands executed when the debugging session ends. +| Parameter | Type | Req | | +|-----------------------------------|-------------|:---:|---------| +| **name** | string | Y | A configuration name that will be displayed in the IDE. +| **type** | string | Y | Must be "lldb-dap". +| **request** | string | Y | Must be "launch" or "attach". +| **program** | string | Y | Path to the executable to launch. +| **sourcePath** | string | | Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths. +| **sourceMap** | [string[2]] | | Specify an array of path re-mappings. Each element in the array must be a two element array containing a source and destination pathname. Overrides sourcePath. +| **debuggerRoot** | string | | Specify a working directory to use when launching lldb-dap. If the debug information in your executable contains relative paths, this option can be used so that `lldb-dap` can find source files and object files that have relative paths. +| **commandEscapePrefix** | string | | The escape prefix to use for executing regular LLDB commands in the Debug Console, instead of printing variables. Defaults to a backtick. If it's an empty string, then all expression in the Debug Console are treated as regular LLDB commands. +| **customFrameFormat** | string | | If non-empty, stack frames will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for frames. If the format string contains errors, an error message will be displayed on the Debug Console and the default frame names will be used. This might come with a performance cost because debug information might need to be processed to generate the description. +| **customThreadFormat** | string | | Same as `customFrameFormat`, but for threads instead of stack frames. +| **displayExtendedBacktrace** | bool | | Enable language specific extended backtraces. +| **enableAutoVariableSummaries** | bool | | Enable auto generated summaries for variables when no summaries exist for a given type. This feature can cause performance delays in large projects when viewing variables. +| **enableSyntheticChildDebugging** | bool | | If a variable is displayed using a synthetic children, also display the actual contents of the variable at the end under a [raw] entry. This is useful when creating sythetic child plug-ins as it lets you see the actual contents of the variable. +| **initCommands** | [string] | | LLDB commands executed upon debugger startup prior to creating the LLDB target. +| **preRunCommands** | [string] | | LLDB commands executed just before launching/attaching, after the LLDB target has been created. +| **stopCommands** | [string] | | LLDB commands executed just after each stop. +| **exitCommands** | [string] | | LLDB commands executed when the program exits. +| **terminateCommands** | [string] | | LLDB commands executed when the debugging session ends. All commands and command outputs will be sent to the debugger console when they are executed. Commands can be prefixed with `?` or `!` to modify their behavior: @@ -208,25 +208,25 @@ Commands can be prefixed with `?` or `!` to modify their behavior: For JSON configurations of `"type": "launch"`, the JSON configuration can additionally contain the following key/value pairs: -|parameter |type|req | | -|-------------------|----|:--:|---------| -|**program** |string|Y| Path to the executable to launch. -|**args** |[string]|| An array of command line argument strings to be passed to the program being launched. -|**cwd** |string| | The program working directory. -|**env** |dictionary| | Environment variables to set when launching the program. The format of each environment variable string is "VAR=VALUE" for environment variables with values or just "VAR" for environment variables with no values. -|**stopOnEntry** |boolean| | Whether to stop program immediately after launching. -|**runInTerminal** |boolean| | Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs. -|**launchCommands** |[string]| | LLDB commands executed to launch the program. +| Parameter | Type | Req | | +|-----------------------------------|-------------|:---:|---------| +| **program** | string | Y | Path to the executable to launch. +| **args** | [string] | | An array of command line argument strings to be passed to the program being launched. +| **cwd** | string | | The program working directory. +| **env** | dictionary | | Environment variables to set when launching the program. The format of each environment variable string is "VAR=VALUE" for environment variables with values or just "VAR" for environment variables with no values. +| **stopOnEntry** | boolean | | Whether to stop program immediately after launching. +| **runInTerminal** | boolean | | Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs. +| **launchCommands** | [string] | | LLDB commands executed to launch the program. For JSON configurations of `"type": "attach"`, the JSON configuration can contain the following `lldb-dap` specific key/value pairs: -|parameter |type |req | | -|-------------------|--------|:--:|---------| -|**program** |string | | Path to the executable to attach to. This value is optional but can help to resolve breakpoints prior the attaching to the program. -|**pid** |number | | The process id of the process you wish to attach to. If **pid** is omitted, the debugger will attempt to attach to the program by finding a process whose file name matches the file name from **porgram**. Setting this value to `${command:pickMyProcess}` will allow interactive process selection in the IDE. -|**waitFor** |boolean | | Wait for the process to launch. -|**attachCommands** |[string]| | LLDB commands that will be executed after **preRunCommands** which take place of the code that normally does the attach. The commands can create a new target and attach or launch it however desired. This allows custom launch and attach configurations. Core files can use `target create --core /path/to/core` to attach to core files. +| Parameter | Type | Req | | +|-----------------------------------|-------------|:---:|---------| +| **program** | string | | Path to the executable to attach to. This value is optional but can help to resolve breakpoints prior the attaching to the program. +| **pid** | number | | The process id of the process you wish to attach to. If **pid** is omitted, the debugger will attempt to attach to the program by finding a process whose file name matches the file name from **porgram**. Setting this value to `${command:pickMyProcess}` will allow interactive process selection in the IDE. +| **waitFor** | boolean | | Wait for the process to launch. +| **attachCommands** | [string] | | LLDB commands that will be executed after **preRunCommands** which take place of the code that normally does the attach. The commands can create a new target and attach or launch it however desired. This allows custom launch and attach configurations. Core files can use `target create --core /path/to/core` to attach to core files. ## Debug Console @@ -295,7 +295,7 @@ and may also be adjusted at runtime using the lldb command lldb-dap includes a command to trigger a Debug Adapter Protocol event from a script. -The event maybe a custom DAP event or a standard event, if the event is not +The event maybe a custom DAP event or a standard event, if the event is not handled internally by `lldb-dap`. This command has the format: @@ -316,9 +316,9 @@ For example you can use a launch configuration hook to trigger custom events lik } ``` -[See the specification](https://microsoft.github.io/debug-adapter-protocol/specification#Base_Protocol_Event) -for more details on Debug Adapter Protocol events and the VS Code -[debug.onDidReceiveDebugSessionCustomEvent](https://code.visualstudio.com/api/references/vscode-api#debug.onDidReceiveDebugSessionCustomEvent) +[See the specification](https://microsoft.github.io/debug-adapter-protocol/specification#Base_Protocol_Event) +for more details on Debug Adapter Protocol events and the VS Code +[debug.onDidReceiveDebugSessionCustomEvent](https://code.visualstudio.com/api/references/vscode-api#debug.onDidReceiveDebugSessionCustomEvent) API for handling a custom event from an extension. ## Contributing From 3e11ae69abd17a80759ae1d9565d555f6a869304 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 4 Dec 2024 15:57:34 +1100 Subject: [PATCH 167/191] [ORC] Merge ostream operators for SymbolStringPtrs into SymbolStringPool.h. NFC. These are simple and commonly used. Having them in the SymbolStringPool header saves clients from having to #include "DebugUtils.h" everywhere. --- llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h | 6 ------ llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h | 3 +++ llvm/lib/ExecutionEngine/Orc/Core.cpp | 5 +++++ llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp | 8 -------- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h index 035139578e08f..fb66bf812f610 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h @@ -29,12 +29,6 @@ namespace orc { // --raw_ostream operators for ORC types-- -/// Render a SymbolStringPtr. -raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym); - -/// Render a NonOwningSymbolStringPtr. -raw_ostream &operator<<(raw_ostream &OS, NonOwningSymbolStringPtr Sym); - /// Render a SymbolNameSet. raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h index f47956a65f2e7..85f08b53f74a3 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h @@ -92,6 +92,9 @@ class SymbolStringPtrBase { return LHS.S < RHS.S; } + friend raw_ostream &operator<<(raw_ostream &OS, + const SymbolStringPtrBase &Sym); + #ifndef NDEBUG // Returns true if the pool entry's ref count is above zero (or if the entry // is an empty or tombstone value). Useful for debugging and testing -- this diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 8502287016413..df04b86bda148 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -14,6 +14,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MSVCErrorWorkarounds.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -37,6 +38,10 @@ char LookupTask::ID = 0; RegisterDependenciesFunction NoDependenciesToRegister = RegisterDependenciesFunction(); +raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtrBase &Sym) { + return (OS << Sym.S->first()); +} + void MaterializationUnit::anchor() {} ResourceTracker::ResourceTracker(JITDylibSP JD) { diff --git a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp index de8d003408871..6ced8c76b037c 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp @@ -137,14 +137,6 @@ struct PrintSymbolMapElemsMatchingCLOpts { namespace llvm { namespace orc { -raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym) { - return OS << *Sym; -} - -raw_ostream &operator<<(raw_ostream &OS, NonOwningSymbolStringPtr Sym) { - return OS << *Sym; -} - raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) { return OS << printSequence(Symbols, '{', '}', PrintAll()); } From 932c5249ff6b3f31f3b67d944cf5ead156b5dd2c Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 3 Dec 2024 21:12:06 -0800 Subject: [PATCH 168/191] [CodeGen] Fix warning after #115531 --- llvm/lib/CodeGen/MachineOperand.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 18027b2db2947..3a9bdde28a2e7 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -909,8 +909,8 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << printJumpTableEntryReference(getIndex()); break; case MachineOperand::MO_GlobalAddress: - if (const auto *GV = getGlobal()) - getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + if (auto *GV = getGlobal()) + GV->printAsOperand(OS, /*PrintType=*/false, MST); else // Invalid, but may appear in debugging scenarios. OS << "globaladdress(null)"; From bc66e9aaa69cf50634929f3bf10e40b336a6abb6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 3 Dec 2024 21:29:02 -0800 Subject: [PATCH 169/191] [lld-link] Replace message(...) with Msg(ctx) to avoid the global ctx. --- lld/COFF/Driver.cpp | 2 +- lld/COFF/DriverUtils.cpp | 2 +- lld/COFF/PDB.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index e4cfcb335869a..a4f7e6c76af49 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1638,7 +1638,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // because it doesn't start with "/", but we deliberately chose "--" to // avoid conflict with /version and for compatibility with clang-cl. if (args.hasArg(OPT_dash_dash_version)) { - message(getLLDVersion()); + Msg(ctx) << getLLDVersion(); return; } diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index 8a72d74bd27ba..f5862977857df 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -922,7 +922,7 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) { std::string msg = "Command line:"; for (const char *s : expandedArgv) msg += " " + std::string(s); - message(msg); + Msg(ctx) << msg; } // Save the command line after response file expansion so we can write it to diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index c20b54a5d42e4..8553832909522 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -1317,7 +1317,7 @@ void PDBLinker::printStats() { printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); } - message(buffer); + Msg(ctx) << buffer; } void PDBLinker::addNatvisFiles() { From 4f41862c5a5241654a37ee994ed0074a815d3633 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Tue, 3 Dec 2024 17:28:08 -0800 Subject: [PATCH 170/191] Reapply "[StructuralHash] Global Variable (#118412)" This reverts commit 6a0d6fc2e92bcfb7cb01a4c6cdd751a9b4b4c159. --- llvm/include/llvm/IR/StructuralHash.h | 3 + llvm/lib/CodeGen/MachineStableHash.cpp | 20 ++++-- llvm/lib/IR/StructuralHash.cpp | 53 +++++++++++++-- .../AArch64/cgdata-merge-gvar-nsconst.ll | 32 ++++++++++ .../CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 38 +++++++++++ .../AArch64/cgdata-merge-gvar-string.ll | 46 +++++++++++++ .../CodeGen/AArch64/cgdata-outline-gvar.ll | 64 +++++++++++++++++++ 7 files changed, 243 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 071575137ff57..514dd6f174b90 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -31,6 +31,9 @@ class Module; /// to true includes instruction and operand type information. stable_hash StructuralHash(const Function &F, bool DetailedHash = false); +/// Returns a hash of the global variable \p G. +stable_hash StructuralHash(const GlobalVariable &G); + /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index facda7a59e2f8..5ab589acee413 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -27,6 +27,8 @@ #include "llvm/CodeGen/Register.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/ErrorHandling.h" @@ -93,13 +95,19 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { return 0; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); - if (!GV->hasName()) { - ++StableHashBailingGlobalAddress; - return 0; + stable_hash GVHash = 0; + if (auto *GVar = dyn_cast(GV)) + GVHash = StructuralHash(*GVar); + if (!GVHash) { + if (!GV->hasName()) { + ++StableHashBailingGlobalAddress; + return 0; + } + GVHash = stable_hash_name(GV->getName()); } - auto Name = GV->getName(); - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - stable_hash_name(Name), MO.getOffset()); + + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash, + MO.getOffset()); } case MachineOperand::MO_TargetIndex: { diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index ccc534a890419..1c617c100c7dc 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -46,7 +46,7 @@ class StructuralHashImpl { /// Assign a unique ID to each Value in the order they are first seen. DenseMap ValueToId; - stable_hash hashType(Type *ValueType) { + static stable_hash hashType(Type *ValueType) { SmallVector Hashes; Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) @@ -65,7 +65,7 @@ class StructuralHashImpl { } } - stable_hash hashAPInt(const APInt &I) { + static stable_hash hashAPInt(const APInt &I) { SmallVector Hashes; Hashes.emplace_back(I.getBitWidth()); auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); @@ -73,11 +73,39 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - stable_hash hashAPFloat(const APFloat &F) { + static stable_hash hashAPFloat(const APFloat &F) { return hashAPInt(F.bitcastToAPInt()); } - stable_hash hashGlobalValue(const GlobalValue *GV) { + static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { + if (!GVar.hasInitializer()) + return hashGlobalValue(&GVar); + + // Hash the contents of a string. + if (GVar.getName().starts_with(".str")) { + auto *C = GVar.getInitializer(); + if (const auto *Seq = dyn_cast(C)) + if (Seq->isString()) + return stable_hash_name(Seq->getAsString()); + } + + // Hash structural contents of Objective-C metadata in specific sections. + // This can be extended to other metadata if needed. + static constexpr const char *SectionNames[] = { + "__cfstring", "__cstring", "__objc_classrefs", + "__objc_methname", "__objc_selrefs", + }; + if (GVar.hasSection()) { + StringRef SectionName = GVar.getSection(); + for (const char *Name : SectionNames) + if (SectionName.contains(Name)) + return hashConstant(GVar.getInitializer()); + } + + return hashGlobalValue(&GVar); + } + + static stable_hash hashGlobalValue(const GlobalValue *GV) { if (!GV->hasName()) return 0; return stable_hash_name(GV->getName()); @@ -87,7 +115,7 @@ class StructuralHashImpl { // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here // we're interested in computing a hash rather than comparing two Constants. // Some of the logic is simplified, e.g, we don't expand GEPOperator. - stable_hash hashConstant(Constant *C) { + static stable_hash hashConstant(const Constant *C) { SmallVector Hashes; Type *Ty = C->getType(); @@ -98,14 +126,21 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } + if (auto *GVar = dyn_cast(C)) { + Hashes.emplace_back(hashGlobalVariable(*GVar)); + return stable_hash_combine(Hashes); + } + if (auto *G = dyn_cast(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } if (const auto *Seq = dyn_cast(C)) { - Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); - return stable_hash_combine(Hashes); + if (Seq->isString()) { + Hashes.emplace_back(stable_hash_name(Seq->getAsString())); + return stable_hash_combine(Hashes); + } } switch (C->getValueID()) { @@ -297,6 +332,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { return H.getHash(); } +stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { + return StructuralHashImpl::hashGlobalVariable(GVar); +} + stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H(DetailedHash); H.update(M); diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll new file mode 100644 index 0000000000000..490a778f69e26 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll @@ -0,0 +1,32 @@ +; This test verifies that global variables (ns constant) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + +%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } +@__CFConstantStringClassReference = external global [0 x i32] +@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 + +@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 + +declare i32 @hoo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll new file mode 100644 index 0000000000000..0073114941501 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll @@ -0,0 +1,38 @@ +; This test verifies that global variables (objc metadata) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + +%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } + +@"OBJC_CLASS_$_MyClass" = external global %struct._class_t +@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 +@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 + +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 + +@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 +@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 + +declare ptr @objc_msgSend(ptr, ptr, ...) + +define i32 @f1() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 + %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} + +define i32 @f2() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 + %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll new file mode 100644 index 0000000000000..1e67425f0b847 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll @@ -0,0 +1,46 @@ +; This test verifies that global variables (string) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm +; CHECK-NOT: _f3.Tgm +; CHECK-NOT: _f4.Tgm + +; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`. +@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1 + +declare i32 @goo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @goo(ptr noundef nonnull @.str) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @goo(ptr noundef nonnull @.str.1) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f3() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f4() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3) + %add = sub nsw i32 %call, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll new file mode 100644 index 0000000000000..63ba1d491f9c7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll @@ -0,0 +1,64 @@ +; This test verifies that global variables are hashed based on their initial contents, +; allowing them to be outlined even if they appear different due to their names. + +; RUN: split-file %s %t + +; The outlined function is created locally. +; Note that `.str.3` is commonly used in both `f1()` and `f2()`. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ +; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE + +; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}: +; WRITE: adrp x1, l_.str.3 +; WRITE-NEXT: add x1, x1, l_.str.3 +; WRITE-NEXT: mov w2 +; WRITE-NEXT: mov w3 +; WRITE-NEXT: mov w4 +; WRITE-NEXT: b + +; Create an object file and merge it into the cgdata. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ +; RUN: -filetype=obj %t/local-two.ll -o %t_write_base +; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base + +; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll. +; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \ +; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ + +; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}: +; READ: adrp x1, l_.str.5 +; READ-NEXT: add x1, x1, l_.str.5 +; READ-NEXT: mov w2 +; READ-NEXT: mov w3 +; READ-NEXT: mov w4 +; READ-NEXT: b + +;--- local-two.ll +@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1 +@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 + +declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) +define i32 @f1() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) + ret i32 %call +} +define i32 @f2() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) + ret i32 %call +} + +;--- local-one.ll +@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 +@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 + +declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) +define i32 @f1() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3) + ret i32 %call +} From 109e4a147faa80aa0ef4da92da203dd6732ee415 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Wed, 4 Dec 2024 13:40:02 +0800 Subject: [PATCH 171/191] [RISCV] Handle zeroinitializer of vector tuple Type (#113995) It doesn't make sense to add a new generic ISD to handle riscv tuple type. Instead we use `SPLAT_VECTOR` for ISD and further lower to `VMV_V_X`. Note: If there's `visitSPLAT_VECTOR` in generic DAG combiner, it needs to skip riscv vector tuple type. Stack on https://github.com/llvm/llvm-project/pull/114329 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 12 +++++ llvm/lib/IR/Type.cpp | 2 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +++++ .../RISCV/vector-tuple-zeroinitializer.ll | 52 +++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/vector-tuple-zeroinitializer.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a38a3e9b91052..b72c5eff22f18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1896,6 +1896,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1)); } + if (VT.isRISCVVectorTuple()) { + assert(C->isNullValue() && "Can only zero this target type!"); + return NodeMap[V] = DAG.getNode( + ISD::BITCAST, getCurSDLoc(), VT, + DAG.getNode( + ISD::SPLAT_VECTOR, getCurSDLoc(), + EVT::getVectorVT(*DAG.getContext(), MVT::i8, + VT.getSizeInBits().getKnownMinValue() / 8, + true), + DAG.getConstant(0, getCurSDLoc(), MVT::getIntegerVT(8)))); + } + VectorType *VecTy = cast(V->getType()); // Now that we know the number and type of the elements, get that number of diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index ac6b8b4c19700..ffa80faf6e249 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -990,7 +990,7 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { Ty->getIntParameter(0); return TargetTypeInfo( ScalableVectorType::get(Type::getInt8Ty(C), TotalNumElts), - TargetExtType::CanBeLocal); + TargetExtType::CanBeLocal, TargetExtType::HasZeroInit); } // DirectX resources diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4544a922def1a..d1da3955c5f20 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -18060,6 +18060,20 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); + if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) { + unsigned NF = VT.getRISCVVectorTupleNumFields(); + unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8); + SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT()); + MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts); + + SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal); + + SDValue Result = DAG.getUNDEF(VT); + for (unsigned i = 0; i < NF; ++i) + Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat, + DAG.getVectorIdxConstant(i, DL)); + return Result; + } // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && diff --git a/llvm/test/CodeGen/RISCV/vector-tuple-zeroinitializer.ll b/llvm/test/CodeGen/RISCV/vector-tuple-zeroinitializer.ll new file mode 100644 index 0000000000000..fb1104e0a3b80 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vector-tuple-zeroinitializer.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK + +define target("riscv.vector.tuple", , 2) @test_tuple_zero_power_of_2() { +; CHECK-LABEL: test_tuple_zero_power_of_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: ret +entry: + ret target("riscv.vector.tuple", , 2) zeroinitializer +} + +define target("riscv.vector.tuple", , 3) @test_tuple_zero_non_power_of_2() { +; CHECK-LABEL: test_tuple_zero_non_power_of_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: ret +entry: + ret target("riscv.vector.tuple", , 3) zeroinitializer +} + +define target("riscv.vector.tuple", , 2) @test_tuple_zero_insert1( %a) { +; CHECK-LABEL: test_tuple_zero_insert1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: ret +entry: + %1 = call target("riscv.vector.tuple", , 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", , 2) zeroinitializer, %a, i32 0) + ret target("riscv.vector.tuple", , 2) %1 +} + +define target("riscv.vector.tuple", , 2) @test_tuple_zero_insert2( %a) { +; CHECK-LABEL: test_tuple_zero_insert2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v6, 0 +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v6 +; CHECK-NEXT: ret +entry: + %1 = call target("riscv.vector.tuple", , 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", , 2) zeroinitializer, %a, i32 1) + ret target("riscv.vector.tuple", , 2) %1 +} From 9c9d4b9e73c19842c4fde581cec0295abed8c977 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 4 Dec 2024 16:35:31 +1100 Subject: [PATCH 172/191] [ORC] Move ostream operator for SymbolStringPtrBase into OrcShared. This will allow clients outside ORC (e.g. JITLink) to use the operator without taking a dependence on ORC. --- llvm/lib/ExecutionEngine/Orc/Core.cpp | 4 ---- .../ExecutionEngine/Orc/Shared/CMakeLists.txt | 1 + .../Orc/Shared/SymbolStringPool.cpp | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/ExecutionEngine/Orc/Shared/SymbolStringPool.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index df04b86bda148..3547eabdd0ae7 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -38,10 +38,6 @@ char LookupTask::ID = 0; RegisterDependenciesFunction NoDependenciesToRegister = RegisterDependenciesFunction(); -raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtrBase &Sym) { - return (OS << Sym.S->first()); -} - void MaterializationUnit::anchor() {} ResourceTracker::ResourceTracker(JITDylibSP JD) { diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Shared/CMakeLists.txt index f4e4a6b4f53fc..792b0cc8251cc 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/Shared/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMOrcShared OrcError.cpp OrcRTBridge.cpp SimpleRemoteEPCUtils.cpp + SymbolStringPool.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/SymbolStringPool.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/SymbolStringPool.cpp new file mode 100644 index 0000000000000..9ca4e59288ecf --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/Shared/SymbolStringPool.cpp @@ -0,0 +1,18 @@ +//===------- SymbolStringPool.cpp - SymbolStringPool implementation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm::orc { + +raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtrBase &Sym) { + return OS << Sym.S->first(); +} + +} // namespace llvm::orc From 5e7c88bf4cca17080dd50556b4b57efa2ca569b0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 4 Dec 2024 05:48:56 +0000 Subject: [PATCH 173/191] [gn build] Port 9c9d4b9e73c1 --- .../gn/secondary/llvm/lib/ExecutionEngine/Orc/Shared/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Shared/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Shared/BUILD.gn index d152bc8c0c2d3..e66271a4d5f1e 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Shared/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/Shared/BUILD.gn @@ -8,5 +8,6 @@ static_library("Shared") { "OrcError.cpp", "OrcRTBridge.cpp", "SimpleRemoteEPCUtils.cpp", + "SymbolStringPool.cpp", ] } From b076fbb8443d93fd4a6815cb9828559e5af14cfb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 3 Dec 2024 22:06:55 -0800 Subject: [PATCH 174/191] [TargetLowering] Use Type* instead of EVT in shouldSignExtendTypeInLibCall. (#118587) I want to use this function for GISel too so Type * is a better common interface. All of the callers already convert EVT to Type * as needed by calling lowering anyway. --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 ++-- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 +++--- llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 ++-- llvm/lib/Target/LoongArch/LoongArchISelLowering.h | 2 +- llvm/lib/Target/Mips/MipsISelLowering.cpp | 6 +++--- llvm/lib/Target/Mips/MipsISelLowering.h | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++-- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 +++-- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 6 +++--- 11 files changed, 22 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index e9c0280638580..a207f3886bd0e 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2292,7 +2292,7 @@ class TargetLoweringBase { virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} /// Returns true if arguments should be sign-extended in lib calls. - virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const { return IsSigned; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2b595b26c9c1c..ca87168929f96 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2104,7 +2104,7 @@ std::pair SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned); + bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, isSigned); CLI.setDebugLoc(SDLoc(Node)) .setChain(InChain) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, @@ -2135,7 +2135,7 @@ std::pair SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; - Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, isSigned); Entry.IsZExt = !Entry.IsSExt; Args.push_back(Entry); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5d9e8b35e24ef..68fbd36cf6e62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -159,8 +159,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, SDValue NewOp = Ops[i]; Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), - CallOptions.IsSigned); + Entry.IsSExt = + shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned); Entry.IsZExt = !Entry.IsSExt; if (CallOptions.IsSoften && @@ -177,7 +177,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSigned); + bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned); bool zeroExtend = !signExtend; if (CallOptions.IsSoften && diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 16bceacfaa222..5a21ac7ebba0d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -6404,8 +6404,8 @@ ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { } bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall( - EVT Type, bool IsSigned) const { - if (Subtarget.is64Bit() && Type == MVT::i32) + Type *Ty, bool IsSigned) const { + if (Subtarget.is64Bit() && Ty->isIntegerTy(32)) return true; return IsSigned; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 605093b01476d..e6de0dc4e361a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -273,7 +273,7 @@ class LoongArchTargetLowering : public TargetLowering { return false; } bool shouldConsiderGEPOffsetSplit() const override { return true; } - bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override; bool shouldExtendTypeInLibCall(EVT Type) const override; bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index d90348153fd3e..036b59c57d5b0 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -3871,10 +3871,10 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, return CCInfo.CheckReturn(Outs, RetCC_Mips); } -bool MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, +bool MipsTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const { - if ((ABI.IsN32() || ABI.IsN64()) && Type == MVT::i32) - return true; + if ((ABI.IsN32() || ABI.IsN64()) && Ty->isIntegerTy(32)) + return true; return IsSigned; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 8033898091c75..e245c056de649 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -623,7 +623,7 @@ class TargetRegisterClass; SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) const; - bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override; // Inline asm support ConstraintType getConstraintType(StringRef Constraint) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e917ef3f5e8c9..564fa29bce7d2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18828,7 +18828,7 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); SDValue Callee = DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout())); - bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false); + bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (const SDValue &N : Op->op_values()) { @@ -18836,7 +18836,7 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op, Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = N; Entry.Ty = ArgTy; - Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend); + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend); Entry.IsZExt = !Entry.IsSExt; Args.push_back(Entry); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d1da3955c5f20..cfb7ff7696191 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -21299,8 +21299,9 @@ bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { return true; } -bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { - if (Subtarget.is64Bit() && Type == MVT::i32) +bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty, + bool IsSigned) const { + if (Subtarget.is64Bit() && Ty->isIntegerTy(32)) return true; return IsSigned; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index c753469562eba..bb0d9a71abf7e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -746,7 +746,7 @@ class RISCVTargetLowering : public TargetLowering { getExceptionSelectorRegister(const Constant *PersonalityFn) const override; bool shouldExtendTypeInLibCall(EVT Type) const override; - bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override; /// Returns the register with the specified architectural or ABI name. This /// method is necessary to lower the llvm.read_register.* and diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 8f505b7e198cf..975a0f5050d16 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2147,8 +2147,8 @@ std::pair SystemZTargetLowering::makeExternalCall( for (SDValue Op : Ops) { Entry.Node = Op; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); - Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); + Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned); + Entry.IsZExt = !Entry.IsSExt; Args.push_back(Entry); } @@ -2157,7 +2157,7 @@ std::pair SystemZTargetLowering::makeExternalCall( Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); + bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned); CLI.setDebugLoc(DL) .setChain(Chain) .setCallee(CallConv, RetTy, Callee, std::move(Args)) From 7be3326200ef382705d8e6b2d7dc5378af96b34a Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Wed, 4 Dec 2024 01:07:57 -0500 Subject: [PATCH 175/191] [clangd] Re-land "support outgoing calls in call hierarchy" (#117673) Co-authored-by: Quentin Chateau --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 8 + clang-tools-extra/clangd/ClangdLSPServer.h | 3 + clang-tools-extra/clangd/ClangdServer.cpp | 14 +- clang-tools-extra/clangd/ClangdServer.h | 9 + clang-tools-extra/clangd/XRefs.cpp | 59 ++++ clang-tools-extra/clangd/XRefs.h | 3 + clang-tools-extra/clangd/index/Background.cpp | 2 +- clang-tools-extra/clangd/index/Background.h | 3 + clang-tools-extra/clangd/index/FileIndex.cpp | 13 +- clang-tools-extra/clangd/index/FileIndex.h | 5 +- clang-tools-extra/clangd/index/Index.cpp | 5 + clang-tools-extra/clangd/index/Index.h | 35 +++ clang-tools-extra/clangd/index/MemIndex.cpp | 20 ++ clang-tools-extra/clangd/index/MemIndex.h | 4 + clang-tools-extra/clangd/index/Merge.cpp | 34 +++ clang-tools-extra/clangd/index/Merge.h | 3 + .../clangd/index/ProjectAware.cpp | 13 + clang-tools-extra/clangd/index/Ref.h | 3 + .../clangd/index/Serialization.cpp | 14 +- .../clangd/index/Serialization.h | 3 +- .../clangd/index/SymbolCollector.cpp | 20 +- .../clangd/index/SymbolCollector.h | 1 + clang-tools-extra/clangd/index/dex/Dex.cpp | 52 +++- clang-tools-extra/clangd/index/dex/Dex.h | 39 ++- .../clangd/index/dex/dexp/Dexp.cpp | 3 +- .../clangd/index/remote/Client.cpp | 7 + .../clangd/index/remote/Index.proto | 18 ++ .../clangd/index/remote/Service.proto | 2 + .../index/remote/marshalling/Marshalling.cpp | 48 +++ .../index/remote/marshalling/Marshalling.h | 7 + .../clangd/index/remote/server/Server.cpp | 55 +++- .../index-serialization/Inputs/sample.idx | Bin 470 -> 470 bytes .../clangd/test/type-hierarchy-ext.test | 2 + .../clangd/test/type-hierarchy.test | 2 + clang-tools-extra/clangd/tool/Check.cpp | 2 +- clang-tools-extra/clangd/tool/ClangdMain.cpp | 14 +- .../clangd/unittests/BackgroundIndexTests.cpp | 3 +- .../clangd/unittests/CallHierarchyTests.cpp | 277 +++++++++++++----- .../clangd/unittests/CodeCompleteTests.cpp | 6 + .../clangd/unittests/DexTests.cpp | 46 +-- .../clangd/unittests/FileIndexTests.cpp | 52 ++-- .../clangd/unittests/IndexTests.cpp | 8 +- .../clangd/unittests/RenameTests.cpp | 14 +- clang-tools-extra/clangd/unittests/TestTU.cpp | 2 +- .../clangd/unittests/TestWorkspace.cpp | 2 +- 45 files changed, 766 insertions(+), 169 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 05dd313d0a0d3..1e981825c7c15 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -1415,6 +1415,12 @@ void ClangdLSPServer::onInlayHint(const InlayHintsParams &Params, std::move(Reply)); } +void ClangdLSPServer::onCallHierarchyOutgoingCalls( + const CallHierarchyOutgoingCallsParams &Params, + Callback> Reply) { + Server->outgoingCalls(Params.item, std::move(Reply)); +} + void ClangdLSPServer::applyConfiguration( const ConfigurationSettings &Settings) { // Per-file update to the compilation database. @@ -1693,6 +1699,8 @@ void ClangdLSPServer::bindMethods(LSPBinder &Bind, Bind.method("typeHierarchy/subtypes", this, &ClangdLSPServer::onSubTypes); Bind.method("textDocument/prepareCallHierarchy", this, &ClangdLSPServer::onPrepareCallHierarchy); Bind.method("callHierarchy/incomingCalls", this, &ClangdLSPServer::onCallHierarchyIncomingCalls); + if (Opts.EnableOutgoingCalls) + Bind.method("callHierarchy/outgoingCalls", this, &ClangdLSPServer::onCallHierarchyOutgoingCalls); Bind.method("textDocument/selectionRange", this, &ClangdLSPServer::onSelectionRange); Bind.method("textDocument/documentLink", this, &ClangdLSPServer::onDocumentLink); Bind.method("textDocument/semanticTokens/full", this, &ClangdLSPServer::onSemanticTokens); diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index 0b8e4720f5323..597fd9de7ff68 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -156,6 +156,9 @@ class ClangdLSPServer : private ClangdServer::Callbacks, void onCallHierarchyIncomingCalls( const CallHierarchyIncomingCallsParams &, Callback>); + void onCallHierarchyOutgoingCalls( + const CallHierarchyOutgoingCallsParams &, + Callback>); void onClangdInlayHints(const InlayHintsParams &, Callback); void onInlayHint(const InlayHintsParams &, Callback>); diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 9b38be04e7ddd..52be15d3da936 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -215,7 +215,9 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS, const Options &Opts, Callbacks *Callbacks) : FeatureModules(Opts.FeatureModules), CDB(CDB), TFS(TFS), - DynamicIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex() : nullptr), + DynamicIdx(Opts.BuildDynamicSymbolIndex + ? new FileIndex(Opts.EnableOutgoingCalls) + : nullptr), ModulesManager(Opts.ModulesManager), ClangTidyProvider(Opts.ClangTidyProvider), UseDirtyHeaders(Opts.UseDirtyHeaders), @@ -256,6 +258,7 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, Callbacks->onBackgroundIndexProgress(S); }; BGOpts.ContextProvider = Opts.ContextProvider; + BGOpts.SupportContainedRefs = Opts.EnableOutgoingCalls; BackgroundIdx = std::make_unique( TFS, CDB, BackgroundIndexStorage::createDiskBackedStorageFactory( @@ -912,6 +915,15 @@ void ClangdServer::inlayHints(PathRef File, std::optional RestrictRange, WorkScheduler->runWithAST("InlayHints", File, std::move(Action), Transient); } +void ClangdServer::outgoingCalls( + const CallHierarchyItem &Item, + Callback> CB) { + WorkScheduler->run("Outgoing Calls", "", + [CB = std::move(CB), Item, this]() mutable { + CB(clangd::outgoingCalls(Item, Index)); + }); +} + void ClangdServer::onFileEvent(const DidChangeWatchedFilesParams &Params) { // FIXME: Do nothing for now. This will be used for indexing and potentially // invalidating other caches. diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index a653cdb56b751..e030bf04122d5 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -110,6 +110,11 @@ class ClangdServer { /// Cached preambles are potentially large. If false, store them on disk. bool StorePreamblesInMemory = true; + /// Call hierarchy's outgoing calls feature requires additional index + /// serving structures which increase memory usage. If false, these are + /// not created and the feature is not enabled. + bool EnableOutgoingCalls = true; + /// This throttler controls which preambles may be built at a given time. clangd::PreambleThrottler *PreambleThrottler = nullptr; @@ -292,6 +297,10 @@ class ClangdServer { void incomingCalls(const CallHierarchyItem &Item, Callback>); + /// Resolve outgoing calls for a given call hierarchy item. + void outgoingCalls(const CallHierarchyItem &Item, + Callback>); + /// Resolve inlay hints for a given document. void inlayHints(PathRef File, std::optional RestrictRange, Callback>); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 61fa66180376c..d237d95b3eb65 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -1702,6 +1702,7 @@ declToHierarchyItem(const NamedDecl &ND, llvm::StringRef TUPath) { HierarchyItem HI; HI.name = printName(Ctx, ND); + // FIXME: Populate HI.detail the way we do in symbolToHierarchyItem? HI.kind = SK; HI.range = Range{sourceLocToPosition(SM, DeclRange->getBegin()), sourceLocToPosition(SM, DeclRange->getEnd())}; @@ -1753,6 +1754,7 @@ static std::optional symbolToHierarchyItem(const Symbol &S, } HierarchyItem HI; HI.name = std::string(S.Name); + HI.detail = (S.Scope + S.Name).str(); HI.kind = indexSymbolKindToSymbolKind(S.SymInfo.Kind); HI.selectionRange = Loc->range; // FIXME: Populate 'range' correctly @@ -2319,6 +2321,63 @@ incomingCalls(const CallHierarchyItem &Item, const SymbolIndex *Index) { return Results; } +std::vector +outgoingCalls(const CallHierarchyItem &Item, const SymbolIndex *Index) { + std::vector Results; + if (!Index || Item.data.empty()) + return Results; + auto ID = SymbolID::fromStr(Item.data); + if (!ID) { + elog("outgoingCalls failed to find symbol: {0}", ID.takeError()); + return Results; + } + // In this function, we find outgoing calls based on the index only. + ContainedRefsRequest Request; + Request.ID = *ID; + // Initially store the ranges in a map keyed by SymbolID of the callee. + // This allows us to group different calls to the same function + // into the same CallHierarchyOutgoingCall. + llvm::DenseMap> CallsOut; + // We can populate the ranges based on a refs request only. As we do so, we + // also accumulate the callee IDs into a lookup request. + LookupRequest CallsOutLookup; + Index->containedRefs(Request, [&](const auto &R) { + auto Loc = indexToLSPLocation(R.Location, Item.uri.file()); + if (!Loc) { + elog("outgoingCalls failed to convert location: {0}", Loc.takeError()); + return; + } + auto It = CallsOut.try_emplace(R.Symbol, std::vector{}).first; + It->second.push_back(Loc->range); + + CallsOutLookup.IDs.insert(R.Symbol); + }); + // Perform the lookup request and combine its results with CallsOut to + // get complete CallHierarchyOutgoingCall objects. + Index->lookup(CallsOutLookup, [&](const Symbol &Callee) { + // The containedRefs request should only return symbols which are + // function-like, i.e. symbols for which references to them can be "calls". + using SK = index::SymbolKind; + auto Kind = Callee.SymInfo.Kind; + assert(Kind == SK::Function || Kind == SK::InstanceMethod || + Kind == SK::ClassMethod || Kind == SK::StaticMethod || + Kind == SK::Constructor || Kind == SK::Destructor || + Kind == SK::ConversionFunction); + + auto It = CallsOut.find(Callee.ID); + assert(It != CallsOut.end()); + if (auto CHI = symbolToCallHierarchyItem(Callee, Item.uri.file())) + Results.push_back( + CallHierarchyOutgoingCall{std::move(*CHI), std::move(It->second)}); + }); + // Sort results by name of the callee. + llvm::sort(Results, [](const CallHierarchyOutgoingCall &A, + const CallHierarchyOutgoingCall &B) { + return A.to.name < B.to.name; + }); + return Results; +} + llvm::DenseSet getNonLocalDeclRefs(ParsedAST &AST, const FunctionDecl *FD) { if (!FD->hasBody()) diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h index df91dd15303c1..247e52314c3f9 100644 --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -150,6 +150,9 @@ prepareCallHierarchy(ParsedAST &AST, Position Pos, PathRef TUPath); std::vector incomingCalls(const CallHierarchyItem &Item, const SymbolIndex *Index); +std::vector +outgoingCalls(const CallHierarchyItem &Item, const SymbolIndex *Index); + /// Returns all decls that are referenced in the \p FD except local symbols. llvm::DenseSet getNonLocalDeclRefs(ParsedAST &AST, const FunctionDecl *FD); diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 5cde4937fee78..496d1455def4b 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -96,7 +96,7 @@ BackgroundIndex::BackgroundIndex( : SwapIndex(std::make_unique()), TFS(TFS), CDB(CDB), IndexingPriority(Opts.IndexingPriority), ContextProvider(std::move(Opts.ContextProvider)), - IndexedSymbols(IndexContents::All), + IndexedSymbols(IndexContents::All, Opts.SupportContainedRefs), Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), IndexStorageFactory(std::move(IndexStorageFactory)), Queue(std::move(Opts.OnProgress)), diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 0d719ffdb957e..448e911201575 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -145,6 +145,9 @@ class BackgroundIndex : public SwapIndex { // file. Called with the empty string for other tasks. // (When called, the context from BackgroundIndex construction is active). std::function ContextProvider = nullptr; + // Whether the index needs to support the containedRefs() operation. + // May use extra memory. + bool SupportContainedRefs = true; }; /// Creates a new background index and starts its threads. diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index eb9562d2b6bf8..aa573e312a756 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -239,8 +239,8 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, /*CollectMainFileRefs=*/false); } -FileSymbols::FileSymbols(IndexContents IdxContents) - : IdxContents(IdxContents) {} +FileSymbols::FileSymbols(IndexContents IdxContents, bool SupportContainedRefs) + : IdxContents(IdxContents), SupportContainedRefs(SupportContainedRefs) {} void FileSymbols::update(llvm::StringRef Key, std::unique_ptr Symbols, @@ -395,7 +395,7 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, std::move(AllRelations), std::move(Files), IdxContents, std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), std::move(RefsStorage), std::move(SymsStorage)), - StorageSize); + StorageSize, SupportContainedRefs); } llvm_unreachable("Unknown clangd::IndexType"); } @@ -419,11 +419,12 @@ void FileSymbols::profile(MemoryTree &MT) const { } } -FileIndex::FileIndex() +FileIndex::FileIndex(bool SupportContainedRefs) : MergedIndex(&MainFileIndex, &PreambleIndex), - PreambleSymbols(IndexContents::Symbols | IndexContents::Relations), + PreambleSymbols(IndexContents::Symbols | IndexContents::Relations, + SupportContainedRefs), PreambleIndex(std::make_unique()), - MainFileSymbols(IndexContents::All), + MainFileSymbols(IndexContents::All, SupportContainedRefs), MainFileIndex(std::make_unique()) {} void FileIndex::updatePreamble(IndexFileIn IF) { diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h index 44f33e8fbcd51..8e88dc9712996 100644 --- a/clang-tools-extra/clangd/index/FileIndex.h +++ b/clang-tools-extra/clangd/index/FileIndex.h @@ -69,7 +69,7 @@ enum class DuplicateHandling { /// locking when we swap or obtain references to snapshots. class FileSymbols { public: - FileSymbols(IndexContents IdxContents); + FileSymbols(IndexContents IdxContents, bool SupportContainedRefs); /// Updates all slabs associated with the \p Key. /// If either is nullptr, corresponding data for \p Key will be removed. /// If CountReferences is true, \p Refs will be used for counting references @@ -91,6 +91,7 @@ class FileSymbols { private: IndexContents IdxContents; + bool SupportContainedRefs; struct RefSlabAndCountReferences { std::shared_ptr Slab; @@ -108,7 +109,7 @@ class FileSymbols { /// FIXME: Expose an interface to remove files that are closed. class FileIndex : public MergedIndex { public: - FileIndex(); + FileIndex(bool SupportContainedRefs); /// Update preamble symbols of file \p Path with all declarations in \p AST /// and macros in \p PP. diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp index 7a0c23287db22..86dc6ed763344 100644 --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -66,6 +66,11 @@ bool SwapIndex::refs(const RefsRequest &R, llvm::function_ref CB) const { return snapshot()->refs(R, CB); } +bool SwapIndex::containedRefs( + const ContainedRefsRequest &R, + llvm::function_ref CB) const { + return snapshot()->containedRefs(R, CB); +} void SwapIndex::relations( const RelationsRequest &R, llvm::function_ref CB) const { diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h index 047ce08e93e3a..a193b1a191216 100644 --- a/clang-tools-extra/clangd/index/Index.h +++ b/clang-tools-extra/clangd/index/Index.h @@ -77,6 +77,19 @@ struct RefsRequest { bool WantContainer = false; }; +struct ContainedRefsRequest { + /// Note that RefKind::Call just restricts the matched SymbolKind to + /// functions, not the form of the reference (e.g. address-of-function, + /// which can indicate an indirect call, should still be caught). + static const RefKind SupportedRefKinds = RefKind::Call; + + SymbolID ID; + /// If set, limit the number of refers returned from the index. The index may + /// choose to return less than this, e.g. it tries to avoid returning stale + /// results. + std::optional Limit; +}; + struct RelationsRequest { llvm::DenseSet Subjects; RelationKind Predicate; @@ -84,6 +97,14 @@ struct RelationsRequest { std::optional Limit; }; +struct ContainedRefsResult { + /// The source location where the symbol is named. + SymbolLocation Location; + RefKind Kind = RefKind::Unknown; + /// The ID of the symbol which is referred to + SymbolID Symbol; +}; + /// Describes what data is covered by an index. /// /// Indexes may contain symbols but not references from a file, etc. @@ -141,6 +162,17 @@ class SymbolIndex { virtual bool refs(const RefsRequest &Req, llvm::function_ref Callback) const = 0; + /// Find all symbols that are referenced by a symbol and apply + /// \p Callback on each result. + /// + /// Results should be returned in arbitrary order. + /// The returned result must be deep-copied if it's used outside Callback. + /// + /// Returns true if there will be more results (limited by Req.Limit); + virtual bool containedRefs( + const ContainedRefsRequest &Req, + llvm::function_ref Callback) const = 0; + /// Finds all relations (S, P, O) stored in the index such that S is among /// Req.Subjects and P is Req.Predicate, and invokes \p Callback for (S, O) in /// each. @@ -175,6 +207,9 @@ class SwapIndex : public SymbolIndex { llvm::function_ref) const override; bool refs(const RefsRequest &, llvm::function_ref) const override; + bool containedRefs( + const ContainedRefsRequest &, + llvm::function_ref) const override; void relations(const RelationsRequest &, llvm::function_ref) const override; diff --git a/clang-tools-extra/clangd/index/MemIndex.cpp b/clang-tools-extra/clangd/index/MemIndex.cpp index 2665d46b97d83..9c9d3942bdee6 100644 --- a/clang-tools-extra/clangd/index/MemIndex.cpp +++ b/clang-tools-extra/clangd/index/MemIndex.cpp @@ -9,6 +9,7 @@ #include "MemIndex.h" #include "FuzzyMatch.h" #include "Quality.h" +#include "index/Index.h" #include "support/Trace.h" namespace clang { @@ -85,6 +86,25 @@ bool MemIndex::refs(const RefsRequest &Req, return false; // We reported all refs. } +bool MemIndex::containedRefs( + const ContainedRefsRequest &Req, + llvm::function_ref Callback) const { + trace::Span Tracer("MemIndex refersTo"); + uint32_t Remaining = Req.Limit.value_or(std::numeric_limits::max()); + for (const auto &Pair : Refs) { + for (const auto &R : Pair.second) { + if (!static_cast(ContainedRefsRequest::SupportedRefKinds & R.Kind) || + Req.ID != R.Container) + continue; + if (Remaining == 0) + return true; // More refs were available. + --Remaining; + Callback({R.Location, R.Kind, Pair.first}); + } + } + return false; // We reported all refs. +} + void MemIndex::relations( const RelationsRequest &Req, llvm::function_ref Callback) const { diff --git a/clang-tools-extra/clangd/index/MemIndex.h b/clang-tools-extra/clangd/index/MemIndex.h index fba2c1a7120a2..8f390c5028dc4 100644 --- a/clang-tools-extra/clangd/index/MemIndex.h +++ b/clang-tools-extra/clangd/index/MemIndex.h @@ -72,6 +72,10 @@ class MemIndex : public SymbolIndex { bool refs(const RefsRequest &Req, llvm::function_ref Callback) const override; + bool containedRefs(const ContainedRefsRequest &Req, + llvm::function_ref + Callback) const override; + void relations(const RelationsRequest &Req, llvm::function_ref Callback) const override; diff --git a/clang-tools-extra/clangd/index/Merge.cpp b/clang-tools-extra/clangd/index/Merge.cpp index 8221d4b1f4440..aecca38a885b6 100644 --- a/clang-tools-extra/clangd/index/Merge.cpp +++ b/clang-tools-extra/clangd/index/Merge.cpp @@ -155,6 +155,40 @@ bool MergedIndex::refs(const RefsRequest &Req, return More || StaticHadMore; } +bool MergedIndex::containedRefs( + const ContainedRefsRequest &Req, + llvm::function_ref Callback) const { + trace::Span Tracer("MergedIndex refersTo"); + bool More = false; + uint32_t Remaining = Req.Limit.value_or(std::numeric_limits::max()); + // We don't want duplicated refs from the static/dynamic indexes, + // and we can't reliably deduplicate them because offsets may differ slightly. + // We consider the dynamic index authoritative and report all its refs, + // and only report static index refs from other files. + More |= Dynamic->containedRefs(Req, [&](const auto &O) { + Callback(O); + assert(Remaining != 0); + --Remaining; + }); + if (Remaining == 0 && More) + return More; + auto DynamicContainsFile = Dynamic->indexedFiles(); + // We return less than Req.Limit if static index returns more refs for dirty + // files. + bool StaticHadMore = Static->containedRefs(Req, [&](const auto &O) { + if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) != + IndexContents::None) + return; // ignore refs that have been seen from dynamic index. + if (Remaining == 0) { + More = true; + return; + } + --Remaining; + Callback(O); + }); + return More || StaticHadMore; +} + llvm::unique_function MergedIndex::indexedFiles() const { return [DynamicContainsFile{Dynamic->indexedFiles()}, diff --git a/clang-tools-extra/clangd/index/Merge.h b/clang-tools-extra/clangd/index/Merge.h index b8a562b0df5d9..7441be6e57e85 100644 --- a/clang-tools-extra/clangd/index/Merge.h +++ b/clang-tools-extra/clangd/index/Merge.h @@ -38,6 +38,9 @@ class MergedIndex : public SymbolIndex { llvm::function_ref) const override; bool refs(const RefsRequest &, llvm::function_ref) const override; + bool containedRefs( + const ContainedRefsRequest &, + llvm::function_ref) const override; void relations(const RelationsRequest &, llvm::function_ref) const override; diff --git a/clang-tools-extra/clangd/index/ProjectAware.cpp b/clang-tools-extra/clangd/index/ProjectAware.cpp index 2c6f8273b35d0..9836f0130362a 100644 --- a/clang-tools-extra/clangd/index/ProjectAware.cpp +++ b/clang-tools-extra/clangd/index/ProjectAware.cpp @@ -35,6 +35,10 @@ class ProjectAwareIndex : public SymbolIndex { /// Query all indexes while prioritizing the associated one (if any). bool refs(const RefsRequest &Req, llvm::function_ref Callback) const override; + /// Query all indexes while prioritizing the associated one (if any). + bool containedRefs(const ContainedRefsRequest &Req, + llvm::function_ref + Callback) const override; /// Queries only the associates index when Req.RestrictForCodeCompletion is /// set, otherwise queries all. @@ -94,6 +98,15 @@ bool ProjectAwareIndex::refs( return false; } +bool ProjectAwareIndex::containedRefs( + const ContainedRefsRequest &Req, + llvm::function_ref Callback) const { + trace::Span Tracer("ProjectAwareIndex::refersTo"); + if (auto *Idx = getIndex()) + return Idx->containedRefs(Req, Callback); + return false; +} + bool ProjectAwareIndex::fuzzyFind( const FuzzyFindRequest &Req, llvm::function_ref Callback) const { diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h index 6e383e2ade3d2..870f77f56e6cb 100644 --- a/clang-tools-extra/clangd/index/Ref.h +++ b/clang-tools-extra/clangd/index/Ref.h @@ -63,6 +63,9 @@ enum class RefKind : uint8_t { // ^ this references Foo, but does not explicitly spell out its name // }; Spelled = 1 << 3, + // A reference which is a call. Used as a filter for which references + // to store in data structures used for computing outgoing calls. + Call = 1 << 4, All = Declaration | Definition | Reference | Spelled, }; diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 72a4e8b007668..f03839599612c 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -457,7 +457,7 @@ readCompileCommand(Reader CmdReader, llvm::ArrayRef Strings) { // The current versioning scheme is simple - non-current versions are rejected. // If you make a breaking change, bump this version number to invalidate stored // data. Later we may want to support some backward compatibility. -constexpr static uint32_t Version = 19; +constexpr static uint32_t Version = 20; llvm::Expected readRIFF(llvm::StringRef Data, SymbolOrigin Origin) { @@ -704,7 +704,8 @@ llvm::Expected readIndexFile(llvm::StringRef Data, } std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, - SymbolOrigin Origin, bool UseDex) { + SymbolOrigin Origin, bool UseDex, + bool SupportContainedRefs) { trace::Span OverallTracer("LoadIndex"); auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); if (!Buffer) { @@ -735,10 +736,11 @@ std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, size_t NumRelations = Relations.size(); trace::Span Tracer("BuildIndex"); - auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs), - std::move(Relations)) - : MemIndex::build(std::move(Symbols), std::move(Refs), - std::move(Relations)); + auto Index = UseDex + ? dex::Dex::build(std::move(Symbols), std::move(Refs), + std::move(Relations), SupportContainedRefs) + : MemIndex::build(std::move(Symbols), std::move(Refs), + std::move(Relations)); vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n" " - number of symbols: {3}\n" " - number of refs: {4}\n" diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index b6890d63d2c38..bf8e036afcb6c 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -83,7 +83,8 @@ std::string toYAML(const Ref &); // Build an in-memory static index from an index file. // The size should be relatively small, so data can be managed in memory. std::unique_ptr loadIndex(llvm::StringRef Filename, - SymbolOrigin Origin, bool UseDex = true); + SymbolOrigin Origin, bool UseDex, + bool SupportContainedRefs); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 91ae9d3003a97..81125dbb1aeaf 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -18,6 +18,7 @@ #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" #include "index/CanonicalIncludes.h" +#include "index/Ref.h" #include "index/Relation.h" #include "index/Symbol.h" #include "index/SymbolID.h" @@ -660,7 +661,7 @@ bool SymbolCollector::handleDeclOccurrence( auto FileLoc = SM.getFileLoc(Loc); auto FID = SM.getFileID(FileLoc); if (Opts.RefsInHeaders || FID == SM.getMainFileID()) { - addRef(ID, SymbolRef{FileLoc, FID, Roles, + addRef(ID, SymbolRef{FileLoc, FID, Roles, index::getSymbolInfo(ND).Kind, getRefContainer(ASTNode.Parent, Opts), isSpelled(FileLoc, *ND)}); } @@ -774,8 +775,10 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, // FIXME: Populate container information for macro references. // FIXME: All MacroRefs are marked as Spelled now, but this should be // checked. - addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr, - /*Spelled=*/true}); + addRef(ID, + SymbolRef{Loc, SM.getFileID(Loc), Roles, index::SymbolKind::Macro, + /*Container=*/nullptr, + /*Spelled=*/true}); } // Collect symbols. @@ -1166,6 +1169,14 @@ bool SymbolCollector::shouldIndexFile(FileID FID) { return I.first->second; } +static bool refIsCall(index::SymbolKind Kind) { + using SK = index::SymbolKind; + return Kind == SK::Function || Kind == SK::InstanceMethod || + Kind == SK::ClassMethod || Kind == SK::StaticMethod || + Kind == SK::Constructor || Kind == SK::Destructor || + Kind == SK::ConversionFunction; +} + void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) { const auto &SM = ASTCtx->getSourceManager(); // FIXME: use the result to filter out references. @@ -1177,6 +1188,9 @@ void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) { R.Location.End = Range.second; R.Location.FileURI = HeaderFileURIs->toURI(*FE).c_str(); R.Kind = toRefKind(SR.Roles, SR.Spelled); + if (refIsCall(SR.Kind)) { + R.Kind |= RefKind::Call; + } R.Container = getSymbolIDCached(SR.Container); Refs.insert(ID, R); } diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index 6ff7a0145ff87..e9eb27fd0f664 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -209,6 +209,7 @@ class SymbolCollector : public index::IndexDataConsumer { SourceLocation Loc; FileID FID; index::SymbolRoleSet Roles; + index::SymbolKind Kind; const Decl *Container; bool Spelled; }; diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp index b7d3063e19b49..5643ba0c5e4ce 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -33,13 +33,14 @@ namespace clangd { namespace dex { std::unique_ptr Dex::build(SymbolSlab Symbols, RefSlab Refs, - RelationSlab Rels) { + RelationSlab Rels, + bool SupportContainedRefs) { auto Size = Symbols.bytes() + Refs.bytes(); // There is no need to include "Rels" in Data because the relations are self- // contained, without references into a backing store. auto Data = std::make_pair(std::move(Symbols), std::move(Refs)); return std::make_unique(Data.first, Data.second, Rels, std::move(Data), - Size); + Size, SupportContainedRefs); } namespace { @@ -120,7 +121,7 @@ class IndexBuilder { } // namespace -void Dex::buildIndex() { +void Dex::buildIndex(bool SupportContainedRefs) { this->Corpus = dex::Corpus(Symbols.size()); std::vector> ScoredSymbols(Symbols.size()); @@ -147,6 +148,20 @@ void Dex::buildIndex() { for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) Builder.add(*Symbols[SymbolRank], SymbolRank); InvertedIndex = std::move(Builder).build(); + + // If the containedRefs() operation is supported, build the RevRefs + // data structure used to implement it. + if (!SupportContainedRefs) + return; + for (const auto &[ID, RefList] : Refs) + for (const auto &R : RefList) + if ((R.Kind & ContainedRefsRequest::SupportedRefKinds) != + RefKind::Unknown) + RevRefs.emplace_back(R, ID); + // Sort by container ID so we can use binary search for lookup. + llvm::sort(RevRefs, [](const RevRef &A, const RevRef &B) { + return A.ref().Container < B.ref().Container; + }); } std::unique_ptr Dex::iterator(const Token &Tok) const { @@ -314,6 +329,36 @@ bool Dex::refs(const RefsRequest &Req, return false; // We reported all refs. } +llvm::iterator_range::const_iterator> +Dex::lookupRevRefs(const SymbolID &Container) const { + // equal_range() requires an element of the same type as the elements of the + // range, so construct a dummy RevRef with the container of interest. + Ref QueryRef; + QueryRef.Container = Container; + RevRef Query(QueryRef, SymbolID{}); + + auto ItPair = std::equal_range(RevRefs.cbegin(), RevRefs.cend(), Query, + [](const RevRef &A, const RevRef &B) { + return A.ref().Container < B.ref().Container; + }); + return {ItPair.first, ItPair.second}; +} + +bool Dex::containedRefs( + const ContainedRefsRequest &Req, + llvm::function_ref Callback) const { + trace::Span Tracer("Dex reversed refs"); + uint32_t Remaining = Req.Limit.value_or(std::numeric_limits::max()); + for (const auto &Rev : lookupRevRefs(Req.ID)) { + // RevRefs are already filtered to ContainedRefsRequest::SupportedRefKinds + if (Remaining == 0) + return true; // More refs were available. + --Remaining; + Callback(Rev.containedRefsResult()); + } + return false; // We reported all refs. +} + void Dex::relations( const RelationsRequest &Req, llvm::function_ref Callback) const { @@ -350,6 +395,7 @@ size_t Dex::estimateMemoryUsage() const { for (const auto &TokenToPostingList : InvertedIndex) Bytes += TokenToPostingList.second.bytes(); Bytes += Refs.getMemorySize(); + Bytes += RevRefs.size() * sizeof(RevRef); Bytes += Relations.getMemorySize(); return Bytes + BackingDataSize; } diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h index 69e161d51135b..20c0503d19b97 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.h +++ b/clang-tools-extra/clangd/index/dex/Dex.h @@ -36,7 +36,8 @@ class Dex : public SymbolIndex { public: // All data must outlive this index. template - Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations) + Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, + bool SupportContainedRefs) : Corpus(0) { for (auto &&Sym : Symbols) this->Symbols.push_back(&Sym); @@ -46,15 +47,15 @@ class Dex : public SymbolIndex { this->Relations[std::make_pair(Rel.Subject, static_cast(Rel.Predicate))] .push_back(Rel.Object); - buildIndex(); + buildIndex(SupportContainedRefs); } // Symbols and Refs are owned by BackingData, Index takes ownership. template Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, - Payload &&BackingData, size_t BackingDataSize) + Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs) : Dex(std::forward(Symbols), std::forward(Refs), - std::forward(Relations)) { + std::forward(Relations), SupportContainedRefs) { KeepAlive = std::shared_ptr( std::make_shared(std::move(BackingData)), nullptr); this->BackingDataSize = BackingDataSize; @@ -64,16 +65,18 @@ class Dex : public SymbolIndex { typename FileRange, typename Payload> Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, - size_t BackingDataSize) + size_t BackingDataSize, bool SupportContainedRefs) : Dex(std::forward(Symbols), std::forward(Refs), std::forward(Relations), - std::forward(BackingData), BackingDataSize) { + std::forward(BackingData), BackingDataSize, + SupportContainedRefs) { this->Files = std::forward(Files); this->IdxContents = IdxContents; } /// Builds an index from slabs. The index takes ownership of the slab. - static std::unique_ptr build(SymbolSlab, RefSlab, RelationSlab); + static std::unique_ptr build(SymbolSlab, RefSlab, RelationSlab, + bool SupportContainedRefs); bool fuzzyFind(const FuzzyFindRequest &Req, @@ -85,6 +88,10 @@ class Dex : public SymbolIndex { bool refs(const RefsRequest &Req, llvm::function_ref Callback) const override; + bool containedRefs(const ContainedRefsRequest &Req, + llvm::function_ref + Callback) const override; + void relations(const RelationsRequest &Req, llvm::function_ref Callback) const override; @@ -95,7 +102,22 @@ class Dex : public SymbolIndex { size_t estimateMemoryUsage() const override; private: - void buildIndex(); + class RevRef { + const Ref *Reference; + SymbolID Target; + + public: + RevRef(const Ref &Reference, SymbolID Target) + : Reference(&Reference), Target(Target) {} + const Ref &ref() const { return *Reference; } + ContainedRefsResult containedRefsResult() const { + return {ref().Location, ref().Kind, Target}; + } + }; + + void buildIndex(bool EnableOutgoingCalls); + llvm::iterator_range::const_iterator> + lookupRevRefs(const SymbolID &Container) const; std::unique_ptr iterator(const Token &Tok) const; std::unique_ptr createFileProximityIterator(llvm::ArrayRef ProximityPaths) const; @@ -116,6 +138,7 @@ class Dex : public SymbolIndex { llvm::DenseMap InvertedIndex; dex::Corpus Corpus; llvm::DenseMap> Refs; + std::vector RevRefs; // sorted by container ID static_assert(sizeof(RelationKind) == sizeof(uint8_t), "RelationKind should be of same size as a uint8_t"); llvm::DenseMap, std::vector> Relations; diff --git a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp index cea59ae409914..f185808ae1544 100644 --- a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp +++ b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp @@ -375,7 +375,8 @@ std::unique_ptr openIndex(llvm::StringRef Index) { return Index.starts_with("remote:") ? remote::getClient(Index.drop_front(strlen("remote:")), ProjectRoot) - : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true); + : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true, + /*SupportContainedRefs=*/true); } bool runCommand(std::string Request, const SymbolIndex &Index) { diff --git a/clang-tools-extra/clangd/index/remote/Client.cpp b/clang-tools-extra/clangd/index/remote/Client.cpp index 391da3916259c..79b827126b4ef 100644 --- a/clang-tools-extra/clangd/index/remote/Client.cpp +++ b/clang-tools-extra/clangd/index/remote/Client.cpp @@ -146,6 +146,13 @@ class IndexClient : public clangd::SymbolIndex { return streamRPC(Request, &remote::v1::SymbolIndex::Stub::Refs, Callback); } + bool containedRefs(const clangd::ContainedRefsRequest &Request, + llvm::function_ref + Callback) const override { + return streamRPC(Request, &remote::v1::SymbolIndex::Stub::ContainedRefs, + Callback); + } + void relations(const clangd::RelationsRequest &Request, llvm::function_ref diff --git a/clang-tools-extra/clangd/index/remote/Index.proto b/clang-tools-extra/clangd/index/remote/Index.proto index 3072299d8f345..689ef9d44ee40 100644 --- a/clang-tools-extra/clangd/index/remote/Index.proto +++ b/clang-tools-extra/clangd/index/remote/Index.proto @@ -131,3 +131,21 @@ message Relation { optional string subject_id = 1; optional Symbol object = 2; } + +message ContainedRefsRequest { + required string id = 1; + optional uint32 limit = 2; +} + +message ContainedRefsReply { + oneof kind { + ContainedRef stream_result = 1; + FinalResult final_result = 2; + } +} + +message ContainedRef { + required SymbolLocation location = 1; + required uint32 kind = 2; + required string symbol = 3; +} diff --git a/clang-tools-extra/clangd/index/remote/Service.proto b/clang-tools-extra/clangd/index/remote/Service.proto index 7c7efa530200d..43023321cb9e1 100644 --- a/clang-tools-extra/clangd/index/remote/Service.proto +++ b/clang-tools-extra/clangd/index/remote/Service.proto @@ -21,5 +21,7 @@ service SymbolIndex { rpc Refs(RefsRequest) returns (stream RefsReply) {} + rpc ContainedRefs(ContainedRefsRequest) returns (stream ContainedRefsReply) {} + rpc Relations(RelationsRequest) returns (stream RelationsReply) {} } diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp index 7e31ada18a657..a80d12347d48d 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp @@ -126,6 +126,18 @@ Marshaller::fromProtobuf(const RefsRequest *Message) { return Req; } +llvm::Expected +Marshaller::fromProtobuf(const ContainedRefsRequest *Message) { + clangd::ContainedRefsRequest Req; + auto ID = SymbolID::fromStr(Message->id()); + if (!ID) + return ID.takeError(); + Req.ID = *ID; + if (Message->has_limit()) + Req.Limit = Message->limit(); + return Req; +} + llvm::Expected Marshaller::fromProtobuf(const RelationsRequest *Message) { clangd::RelationsRequest Req; @@ -192,6 +204,21 @@ llvm::Expected Marshaller::fromProtobuf(const Ref &Message) { return Result; } +llvm::Expected +Marshaller::fromProtobuf(const ContainedRef &Message) { + clangd::ContainedRefsResult Result; + auto Location = fromProtobuf(Message.location()); + if (!Location) + return Location.takeError(); + Result.Location = *Location; + Result.Kind = static_cast(Message.kind()); + auto Symbol = SymbolID::fromStr(Message.symbol()); + if (!Symbol) + return Symbol.takeError(); + Result.Symbol = *Symbol; + return Result; +} + llvm::Expected> Marshaller::fromProtobuf(const Relation &Message) { auto SubjectID = SymbolID::fromStr(Message.subject_id()); @@ -244,6 +271,15 @@ RefsRequest Marshaller::toProtobuf(const clangd::RefsRequest &From) { return RPCRequest; } +ContainedRefsRequest +Marshaller::toProtobuf(const clangd::ContainedRefsRequest &From) { + ContainedRefsRequest RPCRequest; + RPCRequest.set_id(From.ID.str()); + if (From.Limit) + RPCRequest.set_limit(*From.Limit); + return RPCRequest; +} + RelationsRequest Marshaller::toProtobuf(const clangd::RelationsRequest &From) { RelationsRequest RPCRequest; for (const auto &ID : From.Subjects) @@ -299,6 +335,18 @@ llvm::Expected Marshaller::toProtobuf(const clangd::Ref &From) { return Result; } +llvm::Expected +Marshaller::toProtobuf(const clangd::ContainedRefsResult &From) { + ContainedRef Result; + auto Location = toProtobuf(From.Location); + if (!Location) + return Location.takeError(); + *Result.mutable_location() = *Location; + Result.set_kind(static_cast(From.Kind)); + *Result.mutable_symbol() = From.Symbol.str(); + return Result; +} + llvm::Expected Marshaller::toProtobuf(const clangd::SymbolID &Subject, const clangd::Symbol &Object) { Relation Result; diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h index e827b4c155a20..5bee9205aef58 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h @@ -40,6 +40,8 @@ class Marshaller { llvm::Expected fromProtobuf(const Symbol &Message); llvm::Expected fromProtobuf(const Ref &Message); + llvm::Expected + fromProtobuf(const ContainedRef &Message); llvm::Expected> fromProtobuf(const Relation &Message); @@ -48,6 +50,8 @@ class Marshaller { llvm::Expected fromProtobuf(const FuzzyFindRequest *Message); llvm::Expected fromProtobuf(const RefsRequest *Message); + llvm::Expected + fromProtobuf(const ContainedRefsRequest *Message); llvm::Expected fromProtobuf(const RelationsRequest *Message); @@ -58,10 +62,13 @@ class Marshaller { LookupRequest toProtobuf(const clangd::LookupRequest &From); FuzzyFindRequest toProtobuf(const clangd::FuzzyFindRequest &From); RefsRequest toProtobuf(const clangd::RefsRequest &From); + ContainedRefsRequest toProtobuf(const clangd::ContainedRefsRequest &From); RelationsRequest toProtobuf(const clangd::RelationsRequest &From); llvm::Expected toProtobuf(const clangd::Symbol &From); llvm::Expected toProtobuf(const clangd::Ref &From); + llvm::Expected + toProtobuf(const clangd::ContainedRefsResult &From); llvm::Expected toProtobuf(const clangd::SymbolID &Subject, const clangd::Symbol &Object); diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp index 52fca53260a16..890b6c27ed928 100644 --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -258,6 +258,53 @@ class RemoteIndexServer final : public v1::SymbolIndex::Service { return grpc::Status::OK; } + grpc::Status + ContainedRefs(grpc::ServerContext *Context, + const ContainedRefsRequest *Request, + grpc::ServerWriter *Reply) override { + auto StartTime = stopwatch::now(); + WithContextValue WithRequestContext(CurrentRequest, Context); + logRequest(*Request); + trace::Span Tracer("ContainedRefsRequest"); + auto Req = ProtobufMarshaller->fromProtobuf(Request); + if (!Req) { + elog("Can not parse ContainedRefsRequest from protobuf: {0}", + Req.takeError()); + return grpc::Status::CANCELLED; + } + if (!Req->Limit || *Req->Limit > LimitResults) { + log("[public] Limiting result size for ContainedRefs request from {0} to " + "{1}.", + Req->Limit, LimitResults); + Req->Limit = LimitResults; + } + unsigned Sent = 0; + unsigned FailedToSend = 0; + bool HasMore = + Index.containedRefs(*Req, [&](const clangd::ContainedRefsResult &Item) { + auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); + if (!SerializedItem) { + elog("Unable to convert ContainedRefsResult to protobuf: {0}", + SerializedItem.takeError()); + ++FailedToSend; + return; + } + ContainedRefsReply NextMessage; + *NextMessage.mutable_stream_result() = *SerializedItem; + logResponse(NextMessage); + Reply->Write(NextMessage); + ++Sent; + }); + ContainedRefsReply LastMessage; + LastMessage.mutable_final_result()->set_has_more(HasMore); + logResponse(LastMessage); + Reply->Write(LastMessage); + SPAN_ATTACH(Tracer, "Sent", Sent); + SPAN_ATTACH(Tracer, "Failed to send", FailedToSend); + logRequestSummary("v1/ContainedRefs", Sent, StartTime); + return grpc::Status::OK; + } + grpc::Status Relations(grpc::ServerContext *Context, const RelationsRequest *Request, grpc::ServerWriter *Reply) override { @@ -396,7 +443,8 @@ void hotReload(clangd::SwapIndex &Index, llvm::StringRef IndexPath, LastStatus.getLastModificationTime(), Status->getLastModificationTime()); LastStatus = *Status; std::unique_ptr NewIndex = - loadIndex(IndexPath, SymbolOrigin::Static); + loadIndex(IndexPath, SymbolOrigin::Static, /*UseDex=*/true, + /*SupportContainedRefs=*/true); if (!NewIndex) { elog("Failed to load new index. Old index will be served."); return; @@ -532,8 +580,9 @@ int main(int argc, char *argv[]) { return Status.getError().value(); } - auto SymIndex = - clang::clangd::loadIndex(IndexPath, clang::clangd::SymbolOrigin::Static); + auto SymIndex = clang::clangd::loadIndex( + IndexPath, clang::clangd::SymbolOrigin::Static, /*UseDex=*/true, + /*SupportContainedRefs=*/true); if (!SymIndex) { llvm::errs() << "Failed to open the index.\n"; return -1; diff --git a/clang-tools-extra/clangd/test/index-serialization/Inputs/sample.idx b/clang-tools-extra/clangd/test/index-serialization/Inputs/sample.idx index 0c04df86ae1c6cd69ea0f802aff99f8057ffff74..6368e7145b1e4d628708f40d684bc8db1ef7f94d 100644 GIT binary patch delta 180 zcmV;l089VY1J(nO6n_)|0047za%qbI000O9004NLy^TQ*gD?yP^AtV+zUSCOC7xjt zx1@6HC^m@p^#%10J&d%P!%nzi4<|8(yXwWYHc4R?@0zzn0}l4Ci}Dm6g((8Ss+C|- zSILlRht~B)$qktI3f2=OMtP2|$~MyB9e*Z+lQ|U0bc{y5AQ+bqdQpK{+IBt|*2XlY i*8W&q!xsc3U)YhWUjZ4D3jsI*8k0l;QUMy1mjOS#p-G4U delta 180 zcmV;l089VY1J(nO6n_%{0047za%qbI000UB004NLy^TQ*!Y~X3^OSy|-FuH5kopXh znl{3Tqu4=(*F(i0IE*x!%Y+NH@MWIERrTbwUSe2^H(h(=fd+f!o5~dKNq7d(twA)B zU{l?Mv1?{zvPYvAM4lN@sBELFij91DqhDF!Y>re`K1Sn~NEp(aJZZsFYIlQCuEJBS i^`E;vd;rDTV4jhyUjYe|3jsI*3X?RZcko diff --git a/clang-tools-extra/clangd/test/type-hierarchy-ext.test b/clang-tools-extra/clangd/test/type-hierarchy-ext.test index ddb9a014be0c7..8d1a5dc31da0f 100644 --- a/clang-tools-extra/clangd/test/type-hierarchy-ext.test +++ b/clang-tools-extra/clangd/test/type-hierarchy-ext.test @@ -12,6 +12,7 @@ # CHECK-NEXT: "data": { # CHECK-NEXT: "symbolID": "A6576FE083F2949A" # CHECK-NEXT: }, +# CHECK-NEXT: "detail": "Child3", # CHECK-NEXT: "kind": 23, # CHECK-NEXT: "name": "Child3", # CHECK-NEXT: "range": { @@ -153,6 +154,7 @@ # CHECK-NEXT: "data": { # CHECK-NEXT: "symbolID": "5705B382DFC77CBC" # CHECK-NEXT: }, +# CHECK-NEXT: "detail": "Child4", # CHECK-NEXT: "kind": 23, # CHECK-NEXT: "name": "Child4", # CHECK-NEXT: "range": { diff --git a/clang-tools-extra/clangd/test/type-hierarchy.test b/clang-tools-extra/clangd/test/type-hierarchy.test index 69751000a7c6c..a5f13ab13d0b3 100644 --- a/clang-tools-extra/clangd/test/type-hierarchy.test +++ b/clang-tools-extra/clangd/test/type-hierarchy.test @@ -62,6 +62,7 @@ # CHECK-NEXT: ], # CHECK-NEXT: "symbolID": "ECDC0C46D75120F4" # CHECK-NEXT: }, +# CHECK-NEXT: "detail": "Child1", # CHECK-NEXT: "kind": 23, # CHECK-NEXT: "name": "Child1", # CHECK-NEXT: "range": { @@ -112,6 +113,7 @@ # CHECK-NEXT: ], # CHECK-NEXT: "symbolID": "A6576FE083F2949A" # CHECK-NEXT: }, +# CHECK-NEXT: "detail": "Child3", # CHECK-NEXT: "kind": 23, # CHECK-NEXT: "name": "Child3", # CHECK-NEXT: "range": { diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp index bc2eaa77a66ee..df8d075e80596 100644 --- a/clang-tools-extra/clangd/tool/Check.cpp +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -163,7 +163,7 @@ class Checker { unsigned ErrCount = 0; Checker(llvm::StringRef File, const ClangdLSPServer::Options &Opts) - : File(File), Opts(Opts) {} + : File(File), Opts(Opts), Index(/*SupportContainedRefs=*/true) {} // Read compilation database and choose a compile command for the file. bool buildCommand(const ThreadsafeFS &TFS) { diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index cc061e2d93231..80a0653f8f740 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -604,7 +604,7 @@ const char TestScheme::TestDir[] = "/clangd-test"; std::unique_ptr loadExternalIndex(const Config::ExternalIndexSpec &External, - AsyncTaskRunner *Tasks) { + AsyncTaskRunner *Tasks, bool SupportContainedRefs) { static const trace::Metric RemoteIndexUsed("used_remote_index", trace::Metric::Value, "address"); switch (External.Kind) { @@ -620,8 +620,9 @@ loadExternalIndex(const Config::ExternalIndexSpec &External, External.Location); auto NewIndex = std::make_unique(std::make_unique()); auto IndexLoadTask = [File = External.Location, - PlaceHolder = NewIndex.get()] { - if (auto Idx = loadIndex(File, SymbolOrigin::Static, /*UseDex=*/true)) + PlaceHolder = NewIndex.get(), SupportContainedRefs] { + if (auto Idx = loadIndex(File, SymbolOrigin::Static, /*UseDex=*/true, + SupportContainedRefs)) PlaceHolder->reset(std::move(Idx)); }; if (Tasks) { @@ -909,7 +910,12 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var Opts.BackgroundIndexPriority = BackgroundIndexPriority; Opts.ReferencesLimit = ReferencesLimit; Opts.Rename.LimitFiles = RenameFileLimit; - auto PAI = createProjectAwareIndex(loadExternalIndex, Sync); + auto PAI = createProjectAwareIndex( + [SupportContainedRefs = Opts.EnableOutgoingCalls]( + const Config::ExternalIndexSpec &External, AsyncTaskRunner *Tasks) { + return loadExternalIndex(External, Tasks, SupportContainedRefs); + }, + Sync); Opts.StaticIndex = PAI.get(); Opts.AsyncThreadsCount = WorkerThreadsCount; Opts.MemoryCleanup = getMemoryCleanupFunction(); diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index e51942462fbdf..ada14c9939318 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -685,7 +685,8 @@ TEST_F(BackgroundIndexTest, Reindex) { class BackgroundIndexRebuilderTest : public testing::Test { protected: BackgroundIndexRebuilderTest() - : Source(IndexContents::All), Target(std::make_unique()), + : Source(IndexContents::All, /*SupportContainedRefs=*/true), + Target(std::make_unique()), Rebuilder(&Target, &Source, /*Threads=*/10) { // Prepare FileSymbols with TestSymbol in it, for checkRebuild. TestSymbol.ID = SymbolID("foo"); diff --git a/clang-tools-extra/clangd/unittests/CallHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/CallHierarchyTests.cpp index 8821d3aad9c78..316b94305c9ae 100644 --- a/clang-tools-extra/clangd/unittests/CallHierarchyTests.cpp +++ b/clang-tools-extra/clangd/unittests/CallHierarchyTests.cpp @@ -44,17 +44,27 @@ using ::testing::UnorderedElementsAre; // Helpers for matching call hierarchy data structures. MATCHER_P(withName, N, "") { return arg.name == N; } +MATCHER_P(withDetail, N, "") { return arg.detail == N; } MATCHER_P(withSelectionRange, R, "") { return arg.selectionRange == R; } template ::testing::Matcher from(ItemMatcher M) { return Field(&CallHierarchyIncomingCall::from, M); } +template +::testing::Matcher to(ItemMatcher M) { + return Field(&CallHierarchyOutgoingCall::to, M); +} template -::testing::Matcher fromRanges(RangeMatchers... M) { +::testing::Matcher iFromRanges(RangeMatchers... M) { return Field(&CallHierarchyIncomingCall::fromRanges, UnorderedElementsAre(M...)); } +template +::testing::Matcher oFromRanges(RangeMatchers... M) { + return Field(&CallHierarchyOutgoingCall::fromRanges, + UnorderedElementsAre(M...)); +} TEST(CallHierarchy, IncomingOneFileCpp) { Annotations Source(R"cpp( @@ -79,21 +89,24 @@ TEST(CallHierarchy, IncomingOneFileCpp) { prepareCallHierarchy(AST, Source.point(), testPath(TU.Filename)); ASSERT_THAT(Items, ElementsAre(withName("callee"))); auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); - ASSERT_THAT(IncomingLevel1, - ElementsAre(AllOf(from(withName("caller1")), - fromRanges(Source.range("Callee"))))); + ASSERT_THAT( + IncomingLevel1, + ElementsAre(AllOf(from(AllOf(withName("caller1"), withDetail("caller1"))), + iFromRanges(Source.range("Callee"))))); auto IncomingLevel2 = incomingCalls(IncomingLevel1[0].from, Index.get()); - ASSERT_THAT(IncomingLevel2, - ElementsAre(AllOf(from(withName("caller2")), - fromRanges(Source.range("Caller1A"), - Source.range("Caller1B"))), - AllOf(from(withName("caller3")), - fromRanges(Source.range("Caller1C"))))); + ASSERT_THAT( + IncomingLevel2, + ElementsAre(AllOf(from(AllOf(withName("caller2"), withDetail("caller2"))), + iFromRanges(Source.range("Caller1A"), + Source.range("Caller1B"))), + AllOf(from(AllOf(withName("caller3"), withDetail("caller3"))), + iFromRanges(Source.range("Caller1C"))))); auto IncomingLevel3 = incomingCalls(IncomingLevel2[0].from, Index.get()); - ASSERT_THAT(IncomingLevel3, - ElementsAre(AllOf(from(withName("caller3")), - fromRanges(Source.range("Caller2"))))); + ASSERT_THAT( + IncomingLevel3, + ElementsAre(AllOf(from(AllOf(withName("caller3"), withDetail("caller3"))), + iFromRanges(Source.range("Caller2"))))); auto IncomingLevel4 = incomingCalls(IncomingLevel3[0].from, Index.get()); EXPECT_THAT(IncomingLevel4, IsEmpty()); @@ -125,20 +138,24 @@ TEST(CallHierarchy, IncomingOneFileObjC) { ASSERT_THAT(Items, ElementsAre(withName("callee"))); auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); ASSERT_THAT(IncomingLevel1, - ElementsAre(AllOf(from(withName("caller1")), - fromRanges(Source.range("Callee"))))); + ElementsAre(AllOf(from(AllOf(withName("caller1"), + withDetail("MyClass::caller1"))), + iFromRanges(Source.range("Callee"))))); auto IncomingLevel2 = incomingCalls(IncomingLevel1[0].from, Index.get()); ASSERT_THAT(IncomingLevel2, - ElementsAre(AllOf(from(withName("caller2")), - fromRanges(Source.range("Caller1A"), - Source.range("Caller1B"))), - AllOf(from(withName("caller3")), - fromRanges(Source.range("Caller1C"))))); + ElementsAre(AllOf(from(AllOf(withName("caller2"), + withDetail("MyClass::caller2"))), + iFromRanges(Source.range("Caller1A"), + Source.range("Caller1B"))), + AllOf(from(AllOf(withName("caller3"), + withDetail("MyClass::caller3"))), + iFromRanges(Source.range("Caller1C"))))); auto IncomingLevel3 = incomingCalls(IncomingLevel2[0].from, Index.get()); ASSERT_THAT(IncomingLevel3, - ElementsAre(AllOf(from(withName("caller3")), - fromRanges(Source.range("Caller2"))))); + ElementsAre(AllOf(from(AllOf(withName("caller3"), + withDetail("MyClass::caller3"))), + iFromRanges(Source.range("Caller2"))))); auto IncomingLevel4 = incomingCalls(IncomingLevel3[0].from, Index.get()); EXPECT_THAT(IncomingLevel4, IsEmpty()); @@ -167,14 +184,16 @@ TEST(CallHierarchy, MainFileOnlyRef) { prepareCallHierarchy(AST, Source.point(), testPath(TU.Filename)); ASSERT_THAT(Items, ElementsAre(withName("callee"))); auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); - ASSERT_THAT(IncomingLevel1, - ElementsAre(AllOf(from(withName("caller1")), - fromRanges(Source.range("Callee"))))); + ASSERT_THAT( + IncomingLevel1, + ElementsAre(AllOf(from(AllOf(withName("caller1"), withDetail("caller1"))), + iFromRanges(Source.range("Callee"))))); auto IncomingLevel2 = incomingCalls(IncomingLevel1[0].from, Index.get()); - EXPECT_THAT(IncomingLevel2, - ElementsAre(AllOf(from(withName("caller2")), - fromRanges(Source.range("Caller1"))))); + EXPECT_THAT( + IncomingLevel2, + ElementsAre(AllOf(from(AllOf(withName("caller2"), withDetail("caller2"))), + iFromRanges(Source.range("Caller1"))))); } TEST(CallHierarchy, IncomingQualified) { @@ -200,14 +219,72 @@ TEST(CallHierarchy, IncomingQualified) { prepareCallHierarchy(AST, Source.point(), testPath(TU.Filename)); ASSERT_THAT(Items, ElementsAre(withName("Waldo::find"))); auto Incoming = incomingCalls(Items[0], Index.get()); - EXPECT_THAT(Incoming, - ElementsAre(AllOf(from(withName("caller1")), - fromRanges(Source.range("Caller1"))), - AllOf(from(withName("caller2")), - fromRanges(Source.range("Caller2"))))); + EXPECT_THAT( + Incoming, + ElementsAre( + AllOf(from(AllOf(withName("caller1"), withDetail("ns::caller1"))), + iFromRanges(Source.range("Caller1"))), + AllOf(from(AllOf(withName("caller2"), withDetail("ns::caller2"))), + iFromRanges(Source.range("Caller2"))))); } -TEST(CallHierarchy, IncomingMultiFileCpp) { +TEST(CallHierarchy, OutgoingOneFile) { + // Test outgoing call on the main file, with namespaces and methods + Annotations Source(R"cpp( + void callee(int); + namespace ns { + struct Foo { + void caller1(); + }; + void Foo::caller1() { + $Callee[[callee]](42); + } + } + namespace { + void caller2(ns::Foo& F) { + F.$Caller1A[[caller1]](); + F.$Caller1B[[caller1]](); + } + } + void call^er3(ns::Foo& F) { + F.$Caller1C[[caller1]](); + $Caller2[[caller2]](F); + } + )cpp"); + TestTU TU = TestTU::withCode(Source.code()); + auto AST = TU.build(); + auto Index = TU.index(); + + std::vector Items = + prepareCallHierarchy(AST, Source.point(), testPath(TU.Filename)); + ASSERT_THAT(Items, ElementsAre(withName("caller3"))); + auto OugoingLevel1 = outgoingCalls(Items[0], Index.get()); + ASSERT_THAT( + OugoingLevel1, + ElementsAre( + AllOf(to(AllOf(withName("caller1"), withDetail("ns::Foo::caller1"))), + oFromRanges(Source.range("Caller1C"))), + AllOf(to(AllOf(withName("caller2"), withDetail("caller2"))), + oFromRanges(Source.range("Caller2"))))); + + auto OutgoingLevel2 = outgoingCalls(OugoingLevel1[1].to, Index.get()); + ASSERT_THAT( + OutgoingLevel2, + ElementsAre(AllOf( + to(AllOf(withName("caller1"), withDetail("ns::Foo::caller1"))), + oFromRanges(Source.range("Caller1A"), Source.range("Caller1B"))))); + + auto OutgoingLevel3 = outgoingCalls(OutgoingLevel2[0].to, Index.get()); + ASSERT_THAT( + OutgoingLevel3, + ElementsAre(AllOf(to(AllOf(withName("callee"), withDetail("callee"))), + oFromRanges(Source.range("Callee"))))); + + auto OutgoingLevel4 = outgoingCalls(OutgoingLevel3[0].to, Index.get()); + EXPECT_THAT(OutgoingLevel4, IsEmpty()); +} + +TEST(CallHierarchy, MultiFileCpp) { // The test uses a .hh suffix for header files to get clang // to parse them in C++ mode. .h files are parsed in C mode // by default, which causes problems because e.g. symbol @@ -221,32 +298,47 @@ TEST(CallHierarchy, IncomingMultiFileCpp) { void calle^e(int) {} )cpp"); Annotations Caller1H(R"cpp( - void caller1(); + namespace nsa { + void caller1(); + } )cpp"); Annotations Caller1C(R"cpp( #include "callee.hh" #include "caller1.hh" - void caller1() { - [[calle^e]](42); + namespace nsa { + void caller1() { + [[calle^e]](42); + } } )cpp"); Annotations Caller2H(R"cpp( - void caller2(); + namespace nsb { + void caller2(); + } )cpp"); Annotations Caller2C(R"cpp( #include "caller1.hh" #include "caller2.hh" - void caller2() { - $A[[caller1]](); - $B[[caller1]](); + namespace nsb { + void caller2() { + nsa::$A[[caller1]](); + nsa::$B[[caller1]](); + } + } + )cpp"); + Annotations Caller3H(R"cpp( + namespace nsa { + void call^er3(); } )cpp"); Annotations Caller3C(R"cpp( #include "caller1.hh" #include "caller2.hh" - void caller3() { - $Caller1[[caller1]](); - $Caller2[[caller2]](); + namespace nsa { + void call^er3() { + $Caller1[[caller1]](); + nsb::$Caller2[[caller2]](); + } } )cpp"); @@ -254,6 +346,7 @@ TEST(CallHierarchy, IncomingMultiFileCpp) { Workspace.addSource("callee.hh", CalleeH.code()); Workspace.addSource("caller1.hh", Caller1H.code()); Workspace.addSource("caller2.hh", Caller2H.code()); + Workspace.addSource("caller3.hh", Caller3H.code()); Workspace.addMainFile("callee.cc", CalleeC.code()); Workspace.addMainFile("caller1.cc", Caller1C.code()); Workspace.addMainFile("caller2.cc", Caller2C.code()); @@ -261,46 +354,84 @@ TEST(CallHierarchy, IncomingMultiFileCpp) { auto Index = Workspace.index(); - auto CheckCallHierarchy = [&](ParsedAST &AST, Position Pos, PathRef TUPath) { + auto CheckIncomingCalls = [&](ParsedAST &AST, Position Pos, PathRef TUPath) { std::vector Items = prepareCallHierarchy(AST, Pos, TUPath); ASSERT_THAT(Items, ElementsAre(withName("callee"))); auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); ASSERT_THAT(IncomingLevel1, - ElementsAre(AllOf(from(withName("caller1")), - fromRanges(Caller1C.range())))); + ElementsAre(AllOf(from(AllOf(withName("caller1"), + withDetail("nsa::caller1"))), + iFromRanges(Caller1C.range())))); auto IncomingLevel2 = incomingCalls(IncomingLevel1[0].from, Index.get()); ASSERT_THAT( IncomingLevel2, - ElementsAre(AllOf(from(withName("caller2")), - fromRanges(Caller2C.range("A"), Caller2C.range("B"))), - AllOf(from(withName("caller3")), - fromRanges(Caller3C.range("Caller1"))))); + ElementsAre( + AllOf(from(AllOf(withName("caller2"), withDetail("nsb::caller2"))), + iFromRanges(Caller2C.range("A"), Caller2C.range("B"))), + AllOf(from(AllOf(withName("caller3"), withDetail("nsa::caller3"))), + iFromRanges(Caller3C.range("Caller1"))))); auto IncomingLevel3 = incomingCalls(IncomingLevel2[0].from, Index.get()); ASSERT_THAT(IncomingLevel3, - ElementsAre(AllOf(from(withName("caller3")), - fromRanges(Caller3C.range("Caller2"))))); + ElementsAre(AllOf(from(AllOf(withName("caller3"), + withDetail("nsa::caller3"))), + iFromRanges(Caller3C.range("Caller2"))))); auto IncomingLevel4 = incomingCalls(IncomingLevel3[0].from, Index.get()); EXPECT_THAT(IncomingLevel4, IsEmpty()); }; + auto CheckOutgoingCalls = [&](ParsedAST &AST, Position Pos, PathRef TUPath) { + std::vector Items = + prepareCallHierarchy(AST, Pos, TUPath); + ASSERT_THAT(Items, ElementsAre(withName("caller3"))); + auto OutgoingLevel1 = outgoingCalls(Items[0], Index.get()); + ASSERT_THAT( + OutgoingLevel1, + ElementsAre( + AllOf(to(AllOf(withName("caller1"), withDetail("nsa::caller1"))), + oFromRanges(Caller3C.range("Caller1"))), + AllOf(to(AllOf(withName("caller2"), withDetail("nsb::caller2"))), + oFromRanges(Caller3C.range("Caller2"))))); + + auto OutgoingLevel2 = outgoingCalls(OutgoingLevel1[1].to, Index.get()); + ASSERT_THAT(OutgoingLevel2, + ElementsAre(AllOf( + to(AllOf(withName("caller1"), withDetail("nsa::caller1"))), + oFromRanges(Caller2C.range("A"), Caller2C.range("B"))))); + + auto OutgoingLevel3 = outgoingCalls(OutgoingLevel2[0].to, Index.get()); + ASSERT_THAT( + OutgoingLevel3, + ElementsAre(AllOf(to(AllOf(withName("callee"), withDetail("callee"))), + oFromRanges(Caller1C.range())))); + + auto OutgoingLevel4 = outgoingCalls(OutgoingLevel3[0].to, Index.get()); + EXPECT_THAT(OutgoingLevel4, IsEmpty()); + }; + // Check that invoking from a call site works. auto AST = Workspace.openFile("caller1.cc"); ASSERT_TRUE(bool(AST)); - CheckCallHierarchy(*AST, Caller1C.point(), testPath("caller1.cc")); + CheckIncomingCalls(*AST, Caller1C.point(), testPath("caller1.cc")); // Check that invoking from the declaration site works. AST = Workspace.openFile("callee.hh"); ASSERT_TRUE(bool(AST)); - CheckCallHierarchy(*AST, CalleeH.point(), testPath("callee.hh")); + CheckIncomingCalls(*AST, CalleeH.point(), testPath("callee.hh")); + AST = Workspace.openFile("caller3.hh"); + ASSERT_TRUE(bool(AST)); + CheckOutgoingCalls(*AST, Caller3H.point(), testPath("caller3.hh")); // Check that invoking from the definition site works. AST = Workspace.openFile("callee.cc"); ASSERT_TRUE(bool(AST)); - CheckCallHierarchy(*AST, CalleeC.point(), testPath("callee.cc")); + CheckIncomingCalls(*AST, CalleeC.point(), testPath("callee.cc")); + AST = Workspace.openFile("caller3.cc"); + ASSERT_TRUE(bool(AST)); + CheckOutgoingCalls(*AST, Caller3C.point(), testPath("caller3.cc")); } TEST(CallHierarchy, IncomingMultiFileObjC) { @@ -377,20 +508,20 @@ TEST(CallHierarchy, IncomingMultiFileObjC) { auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); ASSERT_THAT(IncomingLevel1, ElementsAre(AllOf(from(withName("caller1")), - fromRanges(Caller1C.range())))); + iFromRanges(Caller1C.range())))); auto IncomingLevel2 = incomingCalls(IncomingLevel1[0].from, Index.get()); - ASSERT_THAT( - IncomingLevel2, - ElementsAre(AllOf(from(withName("caller2")), - fromRanges(Caller2C.range("A"), Caller2C.range("B"))), - AllOf(from(withName("caller3")), - fromRanges(Caller3C.range("Caller1"))))); + ASSERT_THAT(IncomingLevel2, + ElementsAre(AllOf(from(withName("caller2")), + iFromRanges(Caller2C.range("A"), + Caller2C.range("B"))), + AllOf(from(withName("caller3")), + iFromRanges(Caller3C.range("Caller1"))))); auto IncomingLevel3 = incomingCalls(IncomingLevel2[0].from, Index.get()); ASSERT_THAT(IncomingLevel3, ElementsAre(AllOf(from(withName("caller3")), - fromRanges(Caller3C.range("Caller2"))))); + iFromRanges(Caller3C.range("Caller2"))))); auto IncomingLevel4 = incomingCalls(IncomingLevel3[0].from, Index.get()); EXPECT_THAT(IncomingLevel4, IsEmpty()); @@ -438,12 +569,12 @@ TEST(CallHierarchy, CallInLocalVarDecl) { ASSERT_THAT(Items, ElementsAre(withName("callee"))); auto Incoming = incomingCalls(Items[0], Index.get()); - ASSERT_THAT( - Incoming, - ElementsAre( - AllOf(from(withName("caller1")), fromRanges(Source.range("call1"))), - AllOf(from(withName("caller2")), fromRanges(Source.range("call2"))), - AllOf(from(withName("caller3")), fromRanges(Source.range("call3"))))); + ASSERT_THAT(Incoming, ElementsAre(AllOf(from(withName("caller1")), + iFromRanges(Source.range("call1"))), + AllOf(from(withName("caller2")), + iFromRanges(Source.range("call2"))), + AllOf(from(withName("caller3")), + iFromRanges(Source.range("call3"))))); } TEST(CallHierarchy, HierarchyOnField) { @@ -467,7 +598,7 @@ TEST(CallHierarchy, HierarchyOnField) { auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); ASSERT_THAT(IncomingLevel1, ElementsAre(AllOf(from(withName("caller")), - fromRanges(Source.range("Callee"))))); + iFromRanges(Source.range("Callee"))))); } TEST(CallHierarchy, HierarchyOnVar) { @@ -488,7 +619,7 @@ TEST(CallHierarchy, HierarchyOnVar) { auto IncomingLevel1 = incomingCalls(Items[0], Index.get()); ASSERT_THAT(IncomingLevel1, ElementsAre(AllOf(from(withName("caller")), - fromRanges(Source.range("Callee"))))); + iFromRanges(Source.range("Callee"))))); } TEST(CallHierarchy, CallInDifferentFileThanCaller) { @@ -517,7 +648,7 @@ TEST(CallHierarchy, CallInDifferentFileThanCaller) { // header. The protocol does not allow us to represent such calls, so we drop // them. (The call hierarchy item itself is kept.) EXPECT_THAT(Incoming, - ElementsAre(AllOf(from(withName("caller")), fromRanges()))); + ElementsAre(AllOf(from(withName("caller")), iFromRanges()))); } } // namespace diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index a89f499736226..3acacf496e77f 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1703,6 +1703,12 @@ class IndexRequestCollector : public SymbolIndex { return false; } + bool containedRefs( + const ContainedRefsRequest &, + llvm::function_ref) const override { + return false; + } + void relations(const RelationsRequest &, llvm::function_ref) const override {} diff --git a/clang-tools-extra/clangd/unittests/DexTests.cpp b/clang-tools-extra/clangd/unittests/DexTests.cpp index cafbfd324840c..ca8b81b5cb3c0 100644 --- a/clang-tools-extra/clangd/unittests/DexTests.cpp +++ b/clang-tools-extra/clangd/unittests/DexTests.cpp @@ -476,7 +476,7 @@ TEST(DexSearchTokens, SymbolPath) { TEST(Dex, Lookup) { auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -489,7 +489,7 @@ TEST(Dex, FuzzyFind) { auto Index = Dex::build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", "other::ABC", "other::A"}), - RefSlab(), RelationSlab()); + RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "ABC"; Req.Scopes = {"ns::"}; @@ -511,7 +511,8 @@ TEST(Dex, FuzzyFind) { } TEST(DexTest, DexLimitedNumMatches) { - auto I = Dex::build(generateNumSymbols(0, 100), RefSlab(), RelationSlab()); + auto I = + Dex::build(generateNumSymbols(0, 100), RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "5"; Req.AnyScope = true; @@ -526,7 +527,7 @@ TEST(DexTest, DexLimitedNumMatches) { TEST(DexTest, FuzzyMatch) { auto I = Dex::build( generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), - RefSlab(), RelationSlab()); + RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "lol"; Req.AnyScope = true; @@ -537,7 +538,7 @@ TEST(DexTest, FuzzyMatch) { TEST(DexTest, ShortQuery) { auto I = Dex::build(generateSymbols({"_OneTwoFourSix"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; bool Incomplete; @@ -580,7 +581,7 @@ TEST(DexTest, ShortQuery) { TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) { auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; Req.Query = "y"; @@ -589,7 +590,7 @@ TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) { TEST(DexTest, MatchQualifiedNamesWithGlobalScope) { auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {""}; @@ -599,7 +600,7 @@ TEST(DexTest, MatchQualifiedNamesWithGlobalScope) { TEST(DexTest, MatchQualifiedNamesWithOneScope) { auto I = Dex::build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), - RefSlab(), RelationSlab()); + RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -609,7 +610,7 @@ TEST(DexTest, MatchQualifiedNamesWithOneScope) { TEST(DexTest, MatchQualifiedNamesWithMultipleScopes) { auto I = Dex::build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), - RefSlab(), RelationSlab()); + RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::", "b::"}; @@ -618,7 +619,7 @@ TEST(DexTest, MatchQualifiedNamesWithMultipleScopes) { TEST(DexTest, NoMatchNestedScopes) { auto I = Dex::build(generateSymbols({"a::y1", "a::b::y2"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -627,7 +628,7 @@ TEST(DexTest, NoMatchNestedScopes) { TEST(DexTest, WildcardScope) { auto I = Dex::build(generateSymbols({"a::y1", "a::b::y2", "c::y3"}), - RefSlab(), RelationSlab()); + RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; Req.Query = "y"; @@ -638,7 +639,7 @@ TEST(DexTest, WildcardScope) { TEST(DexTest, IgnoreCases) { auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); FuzzyFindRequest Req; Req.Query = "AB"; Req.Scopes = {"ns::"}; @@ -648,7 +649,7 @@ TEST(DexTest, IgnoreCases) { TEST(DexTest, UnknownPostingList) { // Regression test: we used to ignore unknown scopes and accept any symbol. auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); FuzzyFindRequest Req; Req.Scopes = {"ns2::"}; EXPECT_THAT(match(*I, Req), UnorderedElementsAre()); @@ -656,7 +657,7 @@ TEST(DexTest, UnknownPostingList) { TEST(DexTest, Lookup) { auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), RefSlab(), - RelationSlab()); + RelationSlab(), true); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -671,7 +672,7 @@ TEST(DexTest, SymbolIndexOptionsFilter) { CodeCompletionSymbol.Flags = Symbol::SymbolFlag::IndexedForCodeCompletion; NonCodeCompletionSymbol.Flags = Symbol::SymbolFlag::None; std::vector Symbols{CodeCompletionSymbol, NonCodeCompletionSymbol}; - Dex I(Symbols, RefSlab(), RelationSlab()); + Dex I(Symbols, RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; Req.RestrictForCodeCompletion = false; @@ -687,7 +688,7 @@ TEST(DexTest, ProximityPathsBoosting) { CloseSymbol.CanonicalDeclaration.FileURI = "unittest:///a/b/c/d/e/f/file.h"; std::vector Symbols{CloseSymbol, RootSymbol}; - Dex I(Symbols, RefSlab(), RelationSlab()); + Dex I(Symbols, RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; @@ -726,7 +727,7 @@ TEST(DexTests, Refs) { Req.Filter = RefKind::Declaration | RefKind::Definition; std::vector Files; - EXPECT_FALSE(Dex(std::vector{Foo, Bar}, Refs, RelationSlab()) + EXPECT_FALSE(Dex(std::vector{Foo, Bar}, Refs, RelationSlab(), true) .refs(Req, [&](const Ref &R) { Files.push_back(R.Location.FileURI); })); @@ -734,7 +735,7 @@ TEST(DexTests, Refs) { Req.Limit = 1; Files.clear(); - EXPECT_TRUE(Dex(std::vector{Foo, Bar}, Refs, RelationSlab()) + EXPECT_TRUE(Dex(std::vector{Foo, Bar}, Refs, RelationSlab(), true) .refs(Req, [&](const Ref &R) { Files.push_back(R.Location.FileURI); })); @@ -751,7 +752,7 @@ TEST(DexTests, Relations) { std::vector Relations{{Parent.ID, RelationKind::BaseOf, Child1.ID}, {Parent.ID, RelationKind::BaseOf, Child2.ID}}; - Dex I{Symbols, RefSlab(), Relations}; + Dex I{Symbols, RefSlab(), Relations, true}; std::vector Results; RelationsRequest Req; @@ -770,7 +771,7 @@ TEST(DexIndex, IndexedFiles) { auto Data = std::make_pair(std::move(Symbols), std::move(Refs)); llvm::StringSet<> Files = {"unittest:///foo.cc", "unittest:///bar.cc"}; Dex I(std::move(Data.first), std::move(Data.second), RelationSlab(), - std::move(Files), IndexContents::All, std::move(Data), Size); + std::move(Files), IndexContents::All, std::move(Data), Size, true); auto ContainsFile = I.indexedFiles(); EXPECT_EQ(ContainsFile("unittest:///foo.cc"), IndexContents::All); EXPECT_EQ(ContainsFile("unittest:///bar.cc"), IndexContents::All); @@ -784,7 +785,7 @@ TEST(DexTest, PreferredTypesBoosting) { Sym2.Type = "T2"; std::vector Symbols{Sym1, Sym2}; - Dex I(Symbols, RefSlab(), RelationSlab()); + Dex I(Symbols, RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; @@ -820,7 +821,8 @@ TEST(DexTest, TemplateSpecialization) { index::SymbolProperty::TemplatePartialSpecialization); B.insert(S); - auto I = dex::Dex::build(std::move(B).build(), RefSlab(), RelationSlab()); + auto I = + dex::Dex::build(std::move(B).build(), RefSlab(), RelationSlab(), true); FuzzyFindRequest Req; Req.AnyScope = true; diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp index 9f713564b2c01..a92142fbcd7c4 100644 --- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp @@ -104,7 +104,7 @@ std::unique_ptr relSlab(llvm::ArrayRef Rels) { } TEST(FileSymbolsTest, UpdateAndGet) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty()); FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"), nullptr, @@ -116,7 +116,7 @@ TEST(FileSymbolsTest, UpdateAndGet) { } TEST(FileSymbolsTest, Overlap) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); FS.update("f1", numSlab(1, 3), nullptr, nullptr, false); FS.update("f2", numSlab(3, 5), nullptr, nullptr, false); for (auto Type : {IndexType::Light, IndexType::Heavy}) @@ -126,7 +126,7 @@ TEST(FileSymbolsTest, Overlap) { } TEST(FileSymbolsTest, MergeOverlap) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); auto OneSymboSlab = [](Symbol Sym) { SymbolSlab::Builder S; S.insert(Sym); @@ -147,7 +147,7 @@ TEST(FileSymbolsTest, MergeOverlap) { } TEST(FileSymbolsTest, SnapshotAliveAfterRemove) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); SymbolID ID("1"); FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"), nullptr, false); @@ -180,14 +180,14 @@ void update(FileIndex &M, llvm::StringRef Basename, llvm::StringRef Code) { } TEST(FileIndexTest, CustomizedURIScheme) { - FileIndex M; + FileIndex M(true); update(M, "f", "class string {};"); EXPECT_THAT(runFuzzyFind(M, ""), ElementsAre(declURI("unittest:///f.h"))); } TEST(FileIndexTest, IndexAST) { - FileIndex M; + FileIndex M(true); update(M, "f1", "namespace ns { void f() {} class X {}; }"); FuzzyFindRequest Req; @@ -198,7 +198,7 @@ TEST(FileIndexTest, IndexAST) { } TEST(FileIndexTest, NoLocal) { - FileIndex M; + FileIndex M(true); update(M, "f1", "namespace ns { void f() { int local = 0; } class X {}; }"); EXPECT_THAT( @@ -207,7 +207,7 @@ TEST(FileIndexTest, NoLocal) { } TEST(FileIndexTest, IndexMultiASTAndDeduplicate) { - FileIndex M; + FileIndex M(true); update(M, "f1", "namespace ns { void f() {} class X {}; }"); update(M, "f2", "namespace ns { void ff() {} class X {}; }"); @@ -219,7 +219,7 @@ TEST(FileIndexTest, IndexMultiASTAndDeduplicate) { } TEST(FileIndexTest, ClassMembers) { - FileIndex M; + FileIndex M(true); update(M, "f1", "class X { static int m1; int m2; static void f(); };"); EXPECT_THAT(runFuzzyFind(M, ""), @@ -228,7 +228,7 @@ TEST(FileIndexTest, ClassMembers) { } TEST(FileIndexTest, IncludeCollected) { - FileIndex M; + FileIndex M(true); update( M, "f", "// IWYU pragma: private, include \nclass string {};"); @@ -240,7 +240,7 @@ TEST(FileIndexTest, IncludeCollected) { } TEST(FileIndexTest, IWYUPragmaExport) { - FileIndex M; + FileIndex M(true); TestTU File; File.Code = R"cpp(#pragma once @@ -286,7 +286,7 @@ template vector make_vector(Arg A) {} )cpp"; - FileIndex M; + FileIndex M(true); update(M, "f", Source); auto Symbols = runFuzzyFind(M, ""); @@ -334,7 +334,7 @@ TEST(FileIndexTest, RebuildWithPreamble) { IgnoreDiagnostics IgnoreDiags; auto CI = buildCompilerInvocation(PI, IgnoreDiags); - FileIndex Index; + FileIndex Index(true); bool IndexUpdated = false; buildPreamble( FooCpp, *CI, PI, @@ -374,7 +374,7 @@ TEST(FileIndexTest, Refs) { RefsRequest Request; Request.IDs = {Foo.ID}; - FileIndex Index; + FileIndex Index(true); // Add test.cc TestTU Test; Test.HeaderCode = HeaderCode; @@ -409,7 +409,7 @@ TEST(FileIndexTest, MacroRefs) { } )cpp"); - FileIndex Index; + FileIndex Index(true); // Add test.cc TestTU Test; Test.HeaderCode = std::string(HeaderCode.code()); @@ -432,7 +432,7 @@ TEST(FileIndexTest, MacroRefs) { } TEST(FileIndexTest, CollectMacros) { - FileIndex M; + FileIndex M(true); update(M, "f", "#define CLANGD 1"); EXPECT_THAT(runFuzzyFind(M, ""), Contains(qName("CLANGD"))); } @@ -443,7 +443,7 @@ TEST(FileIndexTest, Relations) { TU.HeaderFilename = "f.h"; TU.HeaderCode = "class A {}; class B : public A {};"; auto AST = TU.build(); - FileIndex Index; + FileIndex Index(true); Index.updatePreamble(testPath(TU.Filename), /*Version=*/"null", AST.getASTContext(), AST.getPreprocessor(), AST.getPragmaIncludes()); @@ -493,7 +493,7 @@ TEST(FileIndexTest, ReferencesInMainFileWithPreamble) { )cpp"); TU.Code = std::string(Main.code()); auto AST = TU.build(); - FileIndex Index; + FileIndex Index(true); Index.updateMain(testPath(TU.Filename), AST); // Expect to see references in main file, references in headers are excluded @@ -510,7 +510,7 @@ TEST(FileIndexTest, MergeMainFileSymbols) { Cpp.HeaderFilename = "foo.h"; Cpp.HeaderCode = CommonHeader; - FileIndex Index; + FileIndex Index(true); auto HeaderAST = Header.build(); auto CppAST = Cpp.build(); Index.updateMain(testPath("foo.h"), HeaderAST); @@ -524,7 +524,7 @@ TEST(FileIndexTest, MergeMainFileSymbols) { } TEST(FileSymbolsTest, CountReferencesNoRefSlabs) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); FS.update("f1", numSlab(1, 3), nullptr, nullptr, true); FS.update("f2", numSlab(1, 3), nullptr, nullptr, false); EXPECT_THAT( @@ -536,7 +536,7 @@ TEST(FileSymbolsTest, CountReferencesNoRefSlabs) { } TEST(FileSymbolsTest, CountReferencesWithRefSlabs) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); FS.update("f1cpp", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cpp"), nullptr, true); FS.update("f1h", numSlab(1, 3), refSlab(SymbolID("1"), "f1.h"), nullptr, @@ -558,7 +558,7 @@ TEST(FileSymbolsTest, CountReferencesWithRefSlabs) { } TEST(FileIndexTest, StalePreambleSymbolsDeleted) { - FileIndex M; + FileIndex M(true); TestTU File; File.HeaderFilename = "a.h"; @@ -581,7 +581,7 @@ TEST(FileIndexTest, StalePreambleSymbolsDeleted) { // Verifies that concurrent calls to updateMain don't "lose" any updates. TEST(FileIndexTest, Threadsafety) { - FileIndex M; + FileIndex M(true); Notification Go; constexpr int Count = 10; @@ -714,7 +714,7 @@ TEST(FileShardedIndexTest, Sharding) { } TEST(FileIndexTest, Profile) { - FileIndex FI; + FileIndex FI(true); auto FileName = testPath("foo.cpp"); auto AST = TestTU::withHeaderCode("int a;").build(); @@ -738,7 +738,7 @@ TEST(FileIndexTest, Profile) { } TEST(FileSymbolsTest, Profile) { - FileSymbols FS(IndexContents::All); + FileSymbols FS(IndexContents::All, true); FS.update("f1", numSlab(1, 2), nullptr, nullptr, false); FS.update("f2", nullptr, refSlab(SymbolID("1"), "f1"), nullptr, false); FS.update("f3", nullptr, nullptr, @@ -758,7 +758,7 @@ TEST(FileSymbolsTest, Profile) { } TEST(FileIndexTest, MacrosFromMainFile) { - FileIndex Idx; + FileIndex Idx(true); TestTU TU; TU.Code = "#pragma once\n#define FOO"; TU.Filename = "foo.h"; diff --git a/clang-tools-extra/clangd/unittests/IndexTests.cpp b/clang-tools-extra/clangd/unittests/IndexTests.cpp index 658b4e200004e..a66680d39c87d 100644 --- a/clang-tools-extra/clangd/unittests/IndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/IndexTests.cpp @@ -292,7 +292,7 @@ TEST(MergeIndexTest, Lookup) { } TEST(MergeIndexTest, LookupRemovedDefinition) { - FileIndex DynamicIndex, StaticIndex; + FileIndex DynamicIndex(true), StaticIndex(true); MergedIndex Merge(&DynamicIndex, &StaticIndex); const char *HeaderCode = "class Foo;"; @@ -349,7 +349,7 @@ TEST(MergeIndexTest, FuzzyFind) { } TEST(MergeIndexTest, FuzzyFindRemovedSymbol) { - FileIndex DynamicIndex, StaticIndex; + FileIndex DynamicIndex(true), StaticIndex(true); MergedIndex Merge(&DynamicIndex, &StaticIndex); const char *HeaderCode = "class Foo;"; @@ -446,8 +446,8 @@ TEST(MergeTest, PreferSymbolLocationInCodegenFile) { } TEST(MergeIndexTest, Refs) { - FileIndex Dyn; - FileIndex StaticIndex; + FileIndex Dyn(true); + FileIndex StaticIndex(true); MergedIndex Merge(&Dyn, &StaticIndex); const char *HeaderCode = "class Foo;"; diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 7d9252110b27d..142ed171d1a1c 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -1548,7 +1548,7 @@ TEST(CrossFileRenameTests, DirtyBuffer) { std::string BarPath = testPath("bar.cc"); // Build the index, the index has "Foo" references from foo.cc and "Bar" // references from bar.cc. - FileSymbols FSymbols(IndexContents::All); + FileSymbols FSymbols(IndexContents::All, true); FSymbols.update(FooPath, nullptr, buildRefSlab(FooCode, "Foo", FooPath), nullptr, false); FSymbols.update(BarPath, nullptr, buildRefSlab(BarCode, "Bar", BarPath), @@ -1601,6 +1601,12 @@ TEST(CrossFileRenameTests, DirtyBuffer) { return true; // has more references } + bool containedRefs(const ContainedRefsRequest &Req, + llvm::function_ref + Callback) const override { + return false; + } + bool fuzzyFind( const FuzzyFindRequest &Req, llvm::function_ref Callback) const override { @@ -1652,6 +1658,12 @@ TEST(CrossFileRenameTests, DeduplicateRefsFromIndex) { return false; } + bool containedRefs(const ContainedRefsRequest &Req, + llvm::function_ref + Callback) const override { + return false; + } + bool fuzzyFind(const FuzzyFindRequest &, llvm::function_ref) const override { return false; diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp index 1f02c04125b1e..3f8990c86f714 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.cpp +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp @@ -174,7 +174,7 @@ RefSlab TestTU::headerRefs() const { std::unique_ptr TestTU::index() const { auto AST = build(); - auto Idx = std::make_unique(); + auto Idx = std::make_unique(/*SupportContainedRefs=*/true); Idx->updatePreamble(testPath(Filename), /*Version=*/"null", AST.getASTContext(), AST.getPreprocessor(), AST.getPragmaIncludes()); diff --git a/clang-tools-extra/clangd/unittests/TestWorkspace.cpp b/clang-tools-extra/clangd/unittests/TestWorkspace.cpp index 2130e7a4c6dd4..e9a50f1e8b63a 100644 --- a/clang-tools-extra/clangd/unittests/TestWorkspace.cpp +++ b/clang-tools-extra/clangd/unittests/TestWorkspace.cpp @@ -17,7 +17,7 @@ namespace clang { namespace clangd { std::unique_ptr TestWorkspace::index() { - auto Index = std::make_unique(); + auto Index = std::make_unique(/*SupportContainedRefs=*/true); for (const auto &Input : Inputs) { if (!Input.second.IsMainFile) continue; From 52aff97f40c19671be7d1f5eecc2985ebf260a49 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Wed, 4 Dec 2024 11:48:43 +0530 Subject: [PATCH 176/191] [lld][wasm] Clear lazyBitcodeFiles while resetting context (#118440) Hi @sbc100 I was looking into a use case involving the link function (which got my attention to reset). I see that `lazyBitcodeFiles` variable was introduced here https://github.com/llvm/llvm-project/pull/114327 but I don't see it being reset while destroying the context eventually. Hopefully this should be the correct way to address it. --- lld/wasm/Driver.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 8d01ff839ddfc..37a0156c728f6 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -59,6 +59,7 @@ void Ctx::reset() { stubFiles.clear(); sharedFiles.clear(); bitcodeFiles.clear(); + lazyBitcodeFiles.clear(); syntheticFunctions.clear(); syntheticGlobals.clear(); syntheticTables.clear(); From 1534f456945060e27861000f5f2b16bc1b9e0c1e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 3 Dec 2024 22:19:30 -0800 Subject: [PATCH 177/191] [lld-link] Replace warn(...) with Warn(ctx) --- lld/COFF/DebugTypes.cpp | 8 +++--- lld/COFF/Driver.cpp | 56 +++++++++++++++++++++------------------- lld/COFF/DriverUtils.cpp | 25 +++++++++--------- lld/COFF/InputFiles.cpp | 2 +- lld/COFF/SymbolTable.cpp | 24 +++++++++-------- lld/COFF/Writer.cpp | 34 ++++++++++++------------ 6 files changed, 79 insertions(+), 70 deletions(-) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 7689ad163a657..08f61e0d44621 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -674,8 +674,8 @@ void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { funcIdToType.push_back({funcId, funcType}); } else { StringRef fname = file ? file->getName() : ""; - warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) + - " in " + fname); + Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x" + << utohexstr(curIndex.getIndex()) << " in " << fname; } } } @@ -836,7 +836,7 @@ void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { void PrecompSource::loadGHashes() { if (getDebugH(file)) { - warn("ignoring .debug$H section; pch with ghash is not implemented"); + Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented"; } uint32_t ghashIdx = 0; @@ -864,7 +864,7 @@ void PrecompSource::loadGHashes() { void UsePrecompSource::loadGHashes() { auto e = findPrecompMap(file, precompDependency); if (!e) { - warn(toString(e.takeError())); + Warn(ctx) << e.takeError(); return; } diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index a4f7e6c76af49..11e13f20c8042 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -809,7 +809,7 @@ StringRef LinkerDriver::findDefaultEntry() { if (findUnderscoreMangle("wmain")) { if (!findUnderscoreMangle("main")) return mangle("wmainCRTStartup"); - warn("found both wmain and main; using latter"); + Warn(ctx) << "found both wmain and main; using latter"; } return mangle("mainCRTStartup"); } @@ -828,9 +828,9 @@ WindowsSubsystem LinkerDriver::inferSubsystem() { bool haveWWinMain = findUnderscoreMangle("wWinMain"); if (haveMain || haveWMain) { if (haveWinMain || haveWWinMain) { - warn(std::string("found ") + (haveMain ? "main" : "wmain") + " and " + - (haveWinMain ? "WinMain" : "wWinMain") + - "; defaulting to /subsystem:console"); + Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and " + << (haveWinMain ? "WinMain" : "wWinMain") + << "; defaulting to /subsystem:console"; } return IMAGE_SUBSYSTEM_WINDOWS_CUI; } @@ -910,7 +910,8 @@ static std::string createResponseFile(const opt::InputArgList &args, return std::string(data); } -static unsigned parseDebugTypes(const opt::InputArgList &args) { +static unsigned parseDebugTypes(COFFLinkerContext &ctx, + const opt::InputArgList &args) { unsigned debugTypes = static_cast(DebugType::None); if (auto *a = args.getLastArg(OPT_debugtype)) { @@ -925,7 +926,7 @@ static unsigned parseDebugTypes(const opt::InputArgList &args) { .Case("fixup", static_cast(DebugType::Fixup)) .Default(0); if (v == 0) { - warn("/debugtype: unknown option '" + type + "'"); + Warn(ctx) << "/debugtype: unknown option '" << type << "'"; continue; } debugTypes |= v; @@ -1162,7 +1163,8 @@ void LinkerDriver::parseOrderFile(StringRef arg) { if (set.count(s) == 0) { if (ctx.config.warnMissingOrderSymbol) - warn("/order:" + arg + ": missing symbol: " + s + " [LNK4037]"); + Warn(ctx) << "/order:" << arg << ": missing symbol: " << s + << " [LNK4037]"; } else ctx.config.order[s] = INT_MIN + ctx.config.order.size(); } @@ -1189,7 +1191,7 @@ void LinkerDriver::parseCallGraphFile(StringRef path) { Symbol *sym = map.lookup(name); if (!sym) { if (ctx.config.warnMissingOrderSymbol) - warn(path + ": no such symbol: " + name); + Warn(ctx) << path << ": no such symbol: " << name; return nullptr; } @@ -1332,8 +1334,8 @@ void LinkerDriver::parsePDBAltPath() { else if (var.equals_insensitive("%_ext%")) buf.append(binaryExtension); else { - warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + var + - " as literal"); + Warn(ctx) << "only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + << var << " as literal"; buf.append(var); } @@ -1691,7 +1693,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { addLibSearchPaths(); } else { if (args.hasArg(OPT_vctoolsdir, OPT_winsysroot)) - warn("ignoring /vctoolsdir or /winsysroot flags in MinGW mode"); + Warn(ctx) << "ignoring /vctoolsdir or /winsysroot flags in MinGW mode"; } } @@ -1752,7 +1754,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { StringRef(str).split(vec, ','); for (StringRef s : vec) { if (s == "fastlink") { - warn("/debug:fastlink unsupported; using /debug:full"); + Warn(ctx) << "/debug:fastlink unsupported; using /debug:full"; s = "full"; } if (s == "none") { @@ -1795,7 +1797,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { config->demangle = args.hasFlag(OPT_demangle, OPT_demangle_no, true); // Handle /debugtype - config->debugTypes = parseDebugTypes(args); + config->debugTypes = parseDebugTypes(ctx, args); // Handle /driver[:uponly|:wdm]. config->driverUponly = args.hasArg(OPT_driver_uponly) || @@ -1832,7 +1834,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // Handle /pdbstripped if (args.hasArg(OPT_pdbstripped)) - warn("ignoring /pdbstripped flag, it is not yet supported"); + Warn(ctx) << "ignoring /pdbstripped flag, it is not yet supported"; // Handle /noentry if (args.hasArg(OPT_noentry)) { @@ -2114,7 +2116,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (!isPowerOf2_64(config->align)) error("/align: not a power of two: " + StringRef(arg->getValue())); if (!args.hasArg(OPT_driver)) - warn("/align specified without /driver; image may not run"); + Warn(ctx) << "/align specified without /driver; image may not run"; } // Handle /aligncomm @@ -2199,27 +2201,29 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { OPT_lld_allow_duplicate_weak_no, config->mingw); if (args.hasFlag(OPT_inferasanlibs, OPT_inferasanlibs_no, false)) - warn("ignoring '/inferasanlibs', this flag is not supported"); + Warn(ctx) << "ignoring '/inferasanlibs', this flag is not supported"; if (config->incremental && args.hasArg(OPT_profile)) { - warn("ignoring '/incremental' due to '/profile' specification"); + Warn(ctx) << "ignoring '/incremental' due to '/profile' specification"; config->incremental = false; } if (config->incremental && args.hasArg(OPT_order)) { - warn("ignoring '/incremental' due to '/order' specification"); + Warn(ctx) << "ignoring '/incremental' due to '/order' specification"; config->incremental = false; } if (config->incremental && config->doGC) { - warn("ignoring '/incremental' because REF is enabled; use '/opt:noref' to " - "disable"); + Warn(ctx) << "ignoring '/incremental' because REF is enabled; use " + "'/opt:noref' to " + "disable"; config->incremental = false; } if (config->incremental && config->doICF != ICFLevel::None) { - warn("ignoring '/incremental' because ICF is enabled; use '/opt:noicf' to " - "disable"); + Warn(ctx) << "ignoring '/incremental' because ICF is enabled; use " + "'/opt:noicf' to " + "disable"; config->incremental = false; } @@ -2285,7 +2289,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // We should have inferred a machine type by now from the input files, but if // not we assume x64. if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { - warn("/machine is not specified. x64 is assumed"); + Warn(ctx) << "/machine is not specified. x64 is assumed"; config->machine = AMD64; addWinSysRootLibSearchPaths(); } @@ -2460,8 +2464,8 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } if (config->lldmapFile != "" && config->lldmapFile == config->mapFile) { - warn("/lldmap and /map have the same output file '" + config->mapFile + - "'.\n>>> ignoring /lldmap"); + Warn(ctx) << "/lldmap and /map have the same output file '" + << config->mapFile << "'.\n>>> ignoring /lldmap"; config->lldmapFile.clear(); } @@ -2741,7 +2745,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { Symbol *sym = ctx.symtab.find(name); if (!sym) { - warn("/aligncomm symbol " + name + " not found"); + Warn(ctx) << "/aligncomm symbol " << name << " not found"; continue; } diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index f5862977857df..bb6394aca4984 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -172,7 +172,7 @@ void LinkerDriver::parseMerge(StringRef s) { if (!inserted) { StringRef existing = pair.first->second; if (existing != to) - warn(s + ": already merged into " + existing); + Warn(ctx) << s << ": already merged into " << existing; } } @@ -741,12 +741,12 @@ void LinkerDriver::fixupExports() { continue; } if (existing->source == e.source) { - warn(Twine("duplicate ") + exportSourceName(existing->source) + - " option: " + e.name); + Warn(ctx) << "duplicate " << exportSourceName(existing->source) + << " option: " << e.name; } else { - warn("duplicate export: " + e.name + - Twine(" first seen in " + exportSourceName(existing->source) + - Twine(", now in " + exportSourceName(e.source)))); + Warn(ctx) << "duplicate export: " << e.name << " first seen in " + << exportSourceName(existing->source) << ", now in " + << exportSourceName(e.source); } } ctx.config.exports = std::move(v); @@ -822,7 +822,7 @@ MemoryBufferRef LinkerDriver::convertResToCOFF(ArrayRef mbs, for (const auto &dupeDiag : duplicates) if (ctx.config.forceMultipleRes) - warn(dupeDiag); + Warn(ctx) << dupeDiag; else error(dupeDiag); @@ -945,14 +945,15 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) { for (opt::Arg *arg : args.filtered(OPT_UNKNOWN)) { std::string nearest; if (ctx.optTable.findNearest(arg->getAsString(args), nearest) > 1) - warn("ignoring unknown argument '" + arg->getAsString(args) + "'"); + Warn(ctx) << "ignoring unknown argument '" << arg->getAsString(args) + << "'"; else - warn("ignoring unknown argument '" + arg->getAsString(args) + - "', did you mean '" + nearest + "'"); + Warn(ctx) << "ignoring unknown argument '" << arg->getAsString(args) + << "', did you mean '" << nearest << "'"; } if (args.hasArg(OPT_lib)) - warn("ignoring /lib since it's not the first argument"); + Warn(ctx) << "ignoring /lib since it's not the first argument"; return args; } @@ -994,7 +995,7 @@ ParsedDirectives ArgParser::parseDirectives(StringRef s) { if (missingCount) fatal(Twine(result.args.getArgString(missingIndex)) + ": missing argument"); for (auto *arg : result.args.filtered(OPT_UNKNOWN)) - warn("ignoring unknown argument: " + arg->getAsString(result.args)); + Warn(ctx) << "ignoring unknown argument: " << arg->getAsString(result.args); return result; } diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 9e33774d695fa..65def1f509a4d 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -206,7 +206,7 @@ void ObjFile::initializeECThunks() { case Arm64ECThunkType::GuestExit: break; default: - warn("Ignoring unknown EC thunk type " + Twine(entry->type)); + Warn(ctx) << "Ignoring unknown EC thunk type " << entry->type; } } } diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 0c6df701284b7..9f41421722286 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -327,7 +327,8 @@ void SymbolTable::loadMinGWSymbols() { // If it's lazy or already defined, hook it up as weak alias. if (l->isLazy() || isa(l)) { if (ctx.config.warnStdcallFixup) - warn("Resolving " + origName + " by linking to " + newName); + Warn(ctx) << "Resolving " << origName << " by linking to " + << newName; else log("Resolving " + origName + " by linking to " + newName); undef->setWeakAlias(l); @@ -379,9 +380,9 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { toString(cast(imp)->file)); impSize = sizeof(DefinedRegular); } else { - warn("unable to automatically import " + name + " from " + imp->getName() + - " from " + toString(cast(imp)->file) + - "; unexpected symbol type"); + Warn(ctx) << "unable to automatically import " << name << " from " + << imp->getName() << " from " << cast(imp)->file + << "; unexpected symbol type"; return false; } sym->replaceKeepingName(imp, impSize); @@ -412,7 +413,7 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { /// objFiles and bitcodeFiles (if not nullptr) are used to report where /// undefined symbols are referenced. static void reportProblemSymbols( - const COFFLinkerContext &ctx, const SmallPtrSetImpl &undefs, + COFFLinkerContext &ctx, const SmallPtrSetImpl &undefs, const DenseMap *localImports, bool needBitcodeFiles) { // Return early if there is nothing to report (which should be // the common case). @@ -425,8 +426,9 @@ static void reportProblemSymbols( ctx.config.forceUnresolved); if (localImports) if (Symbol *imp = localImports->lookup(b)) - warn(": locally defined symbol imported: " + toString(ctx, *imp) + - " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); + Warn(ctx) << ": locally defined symbol imported: " + << toString(ctx, *imp) << " (defined in " + << toString(imp->getFile()) << ") [LNK4217]"; } std::vector undefDiags; @@ -447,9 +449,9 @@ static void reportProblemSymbols( } if (localImports) if (Symbol *imp = localImports->lookup(sym)) - warn(toString(file) + - ": locally defined symbol imported: " + toString(ctx, *imp) + - " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); + Warn(ctx) << file << ": locally defined symbol imported: " + << toString(ctx, *imp) << " (defined in " << imp->getFile() + << ") [LNK4217]"; } }; @@ -814,7 +816,7 @@ void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile, existing->getName()); if (ctx.config.forceMultiple) - warn(msg); + Warn(ctx) << msg; else error(msg); } diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index d3e326378ed2d..3ec8e42f97c8e 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1313,7 +1313,7 @@ void Writer::createExportTable() { // Allow using a custom built export table from input object files, instead // of having the linker synthesize the tables. if (ctx.config.hadExplicitExports) - warn("literal .edata sections override exports"); + Warn(ctx) << "literal .edata sections override exports"; } else if (!ctx.config.exports.empty()) { for (Chunk *c : edata.chunks) edataSec->addChunk(c); @@ -1325,7 +1325,7 @@ void Writer::createExportTable() { // Warn on exported deleting destructor. for (auto e : ctx.config.exports) if (e.sym && e.sym->getName().starts_with("??_G")) - warn("export of deleting dtor: " + toString(ctx, *e.sym)); + Warn(ctx) << "export of deleting dtor: " << toString(ctx, *e.sym); } void Writer::removeUnusedSections() { @@ -1457,9 +1457,10 @@ void Writer::createSymbolAndStringTable() { if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0) continue; if (ctx.config.warnLongSectionNames) { - warn("section name " + sec->name + - " is longer than 8 characters and will use a non-standard string " - "table"); + Warn(ctx) + << "section name " << sec->name + << " is longer than 8 characters and will use a non-standard string " + "table"; } sec->setStringTableOff(addEntryToStringTable(sec->name)); } @@ -2086,8 +2087,8 @@ void Writer::getSymbolsFromSections(ObjFile *file, // Validate that the contents look like symbol table indices. ArrayRef data = c->getContents(); if (data.size() % 4 != 0) { - warn("ignoring " + c->getSectionName() + - " symbol table index section in object " + toString(file)); + Warn(ctx) << "ignoring " << c->getSectionName() + << " symbol table index section in object " << file; continue; } @@ -2098,8 +2099,8 @@ void Writer::getSymbolsFromSections(ObjFile *file, ArrayRef objSymbols = file->getSymbols(); for (uint32_t symIndex : symIndices) { if (symIndex >= objSymbols.size()) { - warn("ignoring invalid symbol table index in section " + - c->getSectionName() + " in object " + toString(file)); + Warn(ctx) << "ignoring invalid symbol table index in section " + << c->getSectionName() << " in object " << file; continue; } if (Symbol *s = objSymbols[symIndex]) { @@ -2606,7 +2607,8 @@ void Writer::prepareLoadConfig() { auto *b = cast_if_present(sym); if (!b) { if (ctx.config.guardCF != GuardCFLevel::Off) - warn("Control Flow Guard is enabled but '_load_config_used' is missing"); + Warn(ctx) + << "Control Flow Guard is enabled but '_load_config_used' is missing"; return; } @@ -2616,13 +2618,13 @@ void Writer::prepareLoadConfig() { uint8_t *symBuf = secBuf + (b->getRVA() - sec->getRVA()); uint32_t expectedAlign = ctx.config.is64() ? 8 : 4; if (b->getChunk()->getAlignment() < expectedAlign) - warn("'_load_config_used' is misaligned (expected alignment to be " + - Twine(expectedAlign) + " bytes, got " + - Twine(b->getChunk()->getAlignment()) + " instead)"); + Warn(ctx) << "'_load_config_used' is misaligned (expected alignment to be " + << expectedAlign << " bytes, got " + << b->getChunk()->getAlignment() << " instead)"; else if (!isAligned(Align(expectedAlign), b->getRVA())) - warn("'_load_config_used' is misaligned (RVA is 0x" + - Twine::utohexstr(b->getRVA()) + " not aligned to " + - Twine(expectedAlign) + " bytes)"); + Warn(ctx) << "'_load_config_used' is misaligned (RVA is 0x" + << Twine::utohexstr(b->getRVA()) << " not aligned to " + << expectedAlign << " bytes)"; if (ctx.config.is64()) prepareLoadConfig(reinterpret_cast(symBuf)); From 59bc03cf8e9df134e591c989abb1c068e4201008 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 3 Dec 2024 22:33:36 -0800 Subject: [PATCH 178/191] [lld-link] Simplify warnUnusable. NFC --- lld/COFF/PDB.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 8553832909522..b4b10ef8913f0 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -1006,11 +1006,10 @@ static void warnUnusable(InputFile *f, Error e, bool shouldWarn) { consumeError(std::move(e)); return; } - auto msg = "Cannot use debug info for '" + toString(f) + "' [LNK4099]"; + auto diag = Warn(f->ctx); + diag << "Cannot use debug info for '" << f << "' [LNK4099]"; if (e) - warn(msg + "\n>>> failed to load reference " + toString(std::move(e))); - else - warn(msg); + diag << "\n>>> failed to load reference " << std::move(e); } // Allocate memory for a .debug$S / .debug$F section and relocate it. From 92ed7e292443de1d89754a59a533ded160d544eb Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 4 Dec 2024 12:21:33 +0530 Subject: [PATCH 179/191] [CodeGen][PM] Use errs() instead of dbgs() in printer passes (#118469) Printing passes is not exactly a debug activity, it is used in release (and dbgs() is errs() in release) --- .../llvm/Passes/MachinePassRegistry.def | 20 ++-- llvm/lib/Passes/PassRegistry.def | 108 +++++++++--------- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 437ec39beb040..614e36cfbd1a0 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -28,7 +28,7 @@ MODULE_PASS("global-merge", GlobalMergePass(TM, GlobalMergeOptions())) MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass()) MODULE_PASS("lower-emutls", LowerEmuTLSPass()) MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass()) -MODULE_PASS("print", PhysicalRegisterUsageInfoPrinterPass(dbgs())) +MODULE_PASS("print", PhysicalRegisterUsageInfoPrinterPass(errs())) MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass()) MODULE_PASS("global-merge-func", GlobalMergeFuncPass()) #undef MODULE_PASS @@ -146,19 +146,19 @@ MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass()) MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass()) MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) MACHINE_FUNCTION_PASS("print", PrintMIRPass()) -MACHINE_FUNCTION_PASS("print", LiveIntervalsPrinterPass(dbgs())) -MACHINE_FUNCTION_PASS("print", LiveVariablesPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("print", LiveIntervalsPrinterPass(errs())) +MACHINE_FUNCTION_PASS("print", LiveVariablesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", - MachineBlockFrequencyPrinterPass(dbgs())) + MachineBlockFrequencyPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", - MachineBranchProbabilityPrinterPass(dbgs())) + MachineBranchProbabilityPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", - MachineDominatorTreePrinterPass(dbgs())) -MACHINE_FUNCTION_PASS("print", MachineLoopPrinterPass(dbgs())) + MachineDominatorTreePrinterPass(errs())) +MACHINE_FUNCTION_PASS("print", MachineLoopPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", - MachinePostDominatorTreePrinterPass(dbgs())) -MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) -MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) + MachinePostDominatorTreePrinterPass(errs())) +MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(errs())) +MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(errs())) MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 7c3798f6462a4..772ec5fd10e63 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -89,7 +89,7 @@ MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) MODULE_PASS("instrprof", InstrProfilingLoweringPass()) MODULE_PASS("ctx-instr-lower", PGOCtxProfLoweringPass()) -MODULE_PASS("print", CtxProfAnalysisPrinterPass(dbgs())) +MODULE_PASS("print", CtxProfAnalysisPrinterPass(errs())) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass()) @@ -116,21 +116,21 @@ MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass(TM)) -MODULE_PASS("print", PrintModulePass(dbgs())) -MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs())) -MODULE_PASS("print-callgraph-sccs", CallGraphSCCsPrinterPass(dbgs())) -MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) -MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs())) -MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs())) +MODULE_PASS("print", PrintModulePass(errs())) +MODULE_PASS("print-callgraph", CallGraphPrinterPass(errs())) +MODULE_PASS("print-callgraph-sccs", CallGraphSCCsPrinterPass(errs())) +MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(errs())) +MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(errs())) +MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(errs())) MODULE_PASS("print-must-be-executed-contexts", - MustBeExecutedContextPrinterPass(dbgs())) -MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs())) -MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs())) -MODULE_PASS("print", DXILMetadataAnalysisPrinterPass(dbgs())) -MODULE_PASS("print", DXILResourcePrinterPass(dbgs())) -MODULE_PASS("print", InlineAdvisorAnalysisPrinterPass(dbgs())) -MODULE_PASS("print", ModuleDebugInfoPrinterPass(dbgs())) -MODULE_PASS("print", PhysicalRegisterUsageInfoPrinterPass(dbgs())) + MustBeExecutedContextPrinterPass(errs())) +MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(errs())) +MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(errs())) +MODULE_PASS("print", DXILMetadataAnalysisPrinterPass(errs())) +MODULE_PASS("print", DXILResourcePrinterPass(errs())) +MODULE_PASS("print", InlineAdvisorAnalysisPrinterPass(errs())) +MODULE_PASS("print", ModuleDebugInfoPrinterPass(errs())) +MODULE_PASS("print", PhysicalRegisterUsageInfoPrinterPass(errs())) MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM)) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) MODULE_PASS("recompute-globalsaa", RecomputeGlobalsAAPass()) @@ -225,7 +225,7 @@ MODULE_PASS_WITH_PARAMS( MODULE_PASS_WITH_PARAMS( "print", "StructuralHashPrinterPass", [](StructuralHashOptions Options) { - return StructuralHashPrinterPass(dbgs(), Options); + return StructuralHashPrinterPass(errs(), Options); }, parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored") @@ -424,38 +424,38 @@ FUNCTION_PASS("pa-eval", PAEvalPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("place-safepoints", PlaceSafepointsPass()) -FUNCTION_PASS("print", PrintFunctionPass(dbgs())) +FUNCTION_PASS("print", PrintFunctionPass(errs())) // TODO: rename to print after NPM switch -FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs())) -FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(dbgs())) -FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(dbgs())) -FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(dbgs())) -FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs())) -FUNCTION_PASS("print", LoopAccessInfoPrinterPass(dbgs())) -FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) -FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs())) -FUNCTION_PASS("print", BranchProbabilityPrinterPass(dbgs())) -FUNCTION_PASS("print", CostModelPrinterPass(dbgs())) -FUNCTION_PASS("print", CycleInfoPrinterPass(dbgs())) -FUNCTION_PASS("print", DependenceAnalysisPrinterPass(dbgs())) -FUNCTION_PASS("print", DebugAssignmentTrackingPrinterPass(dbgs())) -FUNCTION_PASS("print", DelinearizationPrinterPass(dbgs())) -FUNCTION_PASS("print", DemandedBitsPrinterPass(dbgs())) -FUNCTION_PASS("print", DominanceFrontierPrinterPass(dbgs())) -FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) -FUNCTION_PASS("print", FunctionPropertiesPrinterPass(dbgs())) -FUNCTION_PASS("print", InlineCostAnnotationPrinterPass(dbgs())) +FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(errs())) +FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs())) +FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs())) +FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs())) +FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs())) +FUNCTION_PASS("print", LoopAccessInfoPrinterPass(errs())) +FUNCTION_PASS("print", AssumptionPrinterPass(errs())) +FUNCTION_PASS("print", BlockFrequencyPrinterPass(errs())) +FUNCTION_PASS("print", BranchProbabilityPrinterPass(errs())) +FUNCTION_PASS("print", CostModelPrinterPass(errs())) +FUNCTION_PASS("print", CycleInfoPrinterPass(errs())) +FUNCTION_PASS("print", DependenceAnalysisPrinterPass(errs())) +FUNCTION_PASS("print", DebugAssignmentTrackingPrinterPass(errs())) +FUNCTION_PASS("print", DelinearizationPrinterPass(errs())) +FUNCTION_PASS("print", DemandedBitsPrinterPass(errs())) +FUNCTION_PASS("print", DominanceFrontierPrinterPass(errs())) +FUNCTION_PASS("print", DominatorTreePrinterPass(errs())) +FUNCTION_PASS("print", FunctionPropertiesPrinterPass(errs())) +FUNCTION_PASS("print", InlineCostAnnotationPrinterPass(errs())) FUNCTION_PASS("print", - InlineSizeEstimatorAnalysisPrinterPass(dbgs())) -FUNCTION_PASS("print", LazyValueInfoPrinterPass(dbgs())) -FUNCTION_PASS("print", LoopPrinterPass(dbgs())) -FUNCTION_PASS("print", MemorySSAWalkerPrinterPass(dbgs())) -FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) -FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) -FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) -FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) -FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) -FUNCTION_PASS("print", UniformityInfoPrinterPass(dbgs())) + InlineSizeEstimatorAnalysisPrinterPass(errs())) +FUNCTION_PASS("print", LazyValueInfoPrinterPass(errs())) +FUNCTION_PASS("print", LoopPrinterPass(errs())) +FUNCTION_PASS("print", MemorySSAWalkerPrinterPass(errs())) +FUNCTION_PASS("print", PhiValuesPrinterPass(errs())) +FUNCTION_PASS("print", PostDominatorTreePrinterPass(errs())) +FUNCTION_PASS("print", RegionInfoPrinterPass(errs())) +FUNCTION_PASS("print", ScalarEvolutionPrinterPass(errs())) +FUNCTION_PASS("print", StackSafetyPrinterPass(errs())) +FUNCTION_PASS("print", UniformityInfoPrinterPass(errs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("reg2mem", RegToMemPass()) @@ -562,19 +562,19 @@ FUNCTION_PASS_WITH_PARAMS( FUNCTION_PASS_WITH_PARAMS( "print", "DependenceAnalysisPrinterPass", [](bool NormalizeResults) { - return DependenceAnalysisPrinterPass(dbgs(), NormalizeResults); + return DependenceAnalysisPrinterPass(errs(), NormalizeResults); }, parseDependenceAnalysisPrinterOptions, "normalized-results") FUNCTION_PASS_WITH_PARAMS( "print", "MemorySSAPrinterPass", [](bool NoEnsureOptimizedUses) { - return MemorySSAPrinterPass(dbgs(), !NoEnsureOptimizedUses); + return MemorySSAPrinterPass(errs(), !NoEnsureOptimizedUses); }, parseMemorySSAPrinterPassOptions, "no-ensure-optimized-uses") FUNCTION_PASS_WITH_PARAMS( "print", "StackLifetimePrinterPass", [](StackLifetime::LivenessType Type) { - return StackLifetimePrinterPass(dbgs(), Type); + return StackLifetimePrinterPass(errs(), Type); }, parseStackLifetimeOptions, "may;must") FUNCTION_PASS_WITH_PARAMS( @@ -664,11 +664,11 @@ LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) -LOOP_PASS("print", PrintLoopPass(dbgs())) -LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) -LOOP_PASS("print", IVUsersPrinterPass(dbgs())) -LOOP_PASS("print", LoopCachePrinterPass(dbgs())) -LOOP_PASS("print", LoopNestPrinterPass(dbgs())) +LOOP_PASS("print", PrintLoopPass(errs())) +LOOP_PASS("print", DDGAnalysisPrinterPass(errs())) +LOOP_PASS("print", IVUsersPrinterPass(errs())) +LOOP_PASS("print", LoopCachePrinterPass(errs())) +LOOP_PASS("print", LoopNestPrinterPass(errs())) #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS From ff281f7d37ead15bdbdbfccb4b82ea93013b1a00 Mon Sep 17 00:00:00 2001 From: ronryvchin <94285266+ronryvchin@users.noreply.github.com> Date: Wed, 4 Dec 2024 08:56:46 +0200 Subject: [PATCH 180/191] [PGO] Add option to always instrumenting loop entries (#116789) This patch extends the PGO infrastructure with an option to prefer the instrumentation of loop entry blocks. This option is a generalization of https://github.com/llvm/llvm-project/commit/19fb5b467bb97f95eace1f3637d2d1041cebd3ce, and helps to cover cases where the loop exit is never executed. An example where this can occur are event handling loops. Note that change does NOT change the default behavior. --- compiler-rt/include/profile/InstrProfData.inc | 5 +- llvm/include/llvm/ProfileData/InstrProf.h | 4 +- .../llvm/ProfileData/InstrProfData.inc | 5 +- .../llvm/ProfileData/InstrProfReader.h | 21 ++++ .../llvm/ProfileData/InstrProfWriter.h | 4 +- .../llvm/Transforms/Instrumentation/CFGMST.h | 41 ++++++- llvm/lib/ProfileData/InstrProfReader.cpp | 5 + llvm/lib/ProfileData/InstrProfWriter.cpp | 7 ++ .../Instrumentation/GCOVProfiling.cpp | 3 +- .../Instrumentation/PGOInstrumentation.cpp | 72 ++++++++---- .../Transforms/PGOProfile/loop_entries_gen.ll | 58 ++++++++++ .../Transforms/PGOProfile/loop_entries_use.ll | 106 ++++++++++++++++++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 4 +- 13 files changed, 305 insertions(+), 30 deletions(-) create mode 100644 llvm/test/Transforms/PGOProfile/loop_entries_gen.ll create mode 100644 llvm/test/Transforms/PGOProfile/loop_entries_use.ll diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index c66b0465a0b54..39613da81ecb4 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_COVMAP_VERSION 6 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the - * version for other variants of profile. We set the 8th most significant bit + * version for other variants of profile. We set the 8th most significant bit * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. + * The 54th bit indicates whether to always instrument loop entry blocks. + * The 58th bit indicates whether to always instrument function entry blocks. * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. @@ -742,6 +744,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, */ #define VARIANT_MASKS_ALL 0xffffffff00000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) +#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 55) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index c5f7800097807..7133c0c6a302c 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -344,7 +344,9 @@ enum class InstrProfKind { MemProf = 0x40, // A temporal profile. TemporalProfile = 0x80, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/TemporalProfile) + // A profile with loop entry basic blocks instrumentation. + LoopEntriesInstrumentation = 0x100, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/LoopEntriesInstrumentation) }; const std::error_category &instrprof_category(); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index c66b0465a0b54..39613da81ecb4 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_COVMAP_VERSION 6 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the - * version for other variants of profile. We set the 8th most significant bit + * version for other variants of profile. We set the 8th most significant bit * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. + * The 54th bit indicates whether to always instrument loop entry blocks. + * The 58th bit indicates whether to always instrument function entry blocks. * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. @@ -742,6 +744,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, */ #define VARIANT_MASKS_ALL 0xffffffff00000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) +#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 55) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 1fad2343e2c96..330cf540c099b 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -123,6 +123,9 @@ class InstrProfReader { virtual bool instrEntryBBEnabled() const = 0; + /// Return true if the profile instruments all loop entries. + virtual bool instrLoopEntriesEnabled() const = 0; + /// Return true if the profile has single byte counters representing coverage. virtual bool hasSingleByteCoverage() const = 0; @@ -274,6 +277,11 @@ class TextInstrProfReader : public InstrProfReader { InstrProfKind::FunctionEntryInstrumentation); } + bool instrLoopEntriesEnabled() const override { + return static_cast(ProfileKind & + InstrProfKind::LoopEntriesInstrumentation); + } + bool hasSingleByteCoverage() const override { return static_cast(ProfileKind & InstrProfKind::SingleByteCoverage); } @@ -398,6 +406,10 @@ class RawInstrProfReader : public InstrProfReader { return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; } + bool instrLoopEntriesEnabled() const override { + return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0; + } + bool hasSingleByteCoverage() const override { return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0; } @@ -564,6 +576,7 @@ struct InstrProfReaderIndexBase { virtual bool isIRLevelProfile() const = 0; virtual bool hasCSIRLevelProfile() const = 0; virtual bool instrEntryBBEnabled() const = 0; + virtual bool instrLoopEntriesEnabled() const = 0; virtual bool hasSingleByteCoverage() const = 0; virtual bool functionEntryOnly() const = 0; virtual bool hasMemoryProfile() const = 0; @@ -628,6 +641,10 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; } + bool instrLoopEntriesEnabled() const override { + return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0; + } + bool hasSingleByteCoverage() const override { return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0; } @@ -753,6 +770,10 @@ class IndexedInstrProfReader : public InstrProfReader { return Index->instrEntryBBEnabled(); } + bool instrLoopEntriesEnabled() const override { + return Index->instrLoopEntriesEnabled(); + } + bool hasSingleByteCoverage() const override { return Index->hasSingleByteCoverage(); } diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index fa30926c66258..fdb51c4ab4218 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -190,7 +190,9 @@ class InstrProfWriter { return make_error(instrprof_error::unsupported_version); } if (testIncompatible(InstrProfKind::FunctionEntryOnly, - InstrProfKind::FunctionEntryInstrumentation)) { + InstrProfKind::FunctionEntryInstrumentation) || + testIncompatible(InstrProfKind::FunctionEntryOnly, + InstrProfKind::LoopEntriesInstrumentation)) { return make_error( instrprof_error::unsupported_version, "cannot merge FunctionEntryOnly profiles and BB profiles together"); diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h index 35b3d615e3844..f6bf045f7de2c 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h @@ -19,6 +19,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/BranchProbability.h" @@ -52,10 +53,14 @@ template class CFGMST { BranchProbabilityInfo *const BPI; BlockFrequencyInfo *const BFI; + LoopInfo *const LI; // If function entry will be always instrumented. const bool InstrumentFuncEntry; + // If true loop entries will be always instrumented. + const bool InstrumentLoopEntries; + // Find the root group of the G and compress the path from G to the root. BBInfo *findAndCompressGroup(BBInfo *G) { if (G->Group != G) @@ -154,6 +159,16 @@ template class CFGMST { } if (BPI != nullptr) Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor); + // If InstrumentLoopEntries is on and the current edge leads to a loop + // (i.e., TargetBB is a loop head and BB is outside its loop), set + // Weight to be minimal, so that the edge won't be chosen for the MST + // and will be instrumented. + if (InstrumentLoopEntries && LI->isLoopHeader(TargetBB)) { + Loop *TargetLoop = LI->getLoopFor(TargetBB); + assert(TargetLoop); + if (!TargetLoop->contains(&BB)) + Weight = 0; + } if (Weight == 0) Weight++; auto *E = &addEdge(&BB, TargetBB, Weight); @@ -252,6 +267,19 @@ template class CFGMST { } } + [[maybe_unused]] bool validateLoopEntryInstrumentation() { + if (!InstrumentLoopEntries) + return true; + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (Ei->DestBB && LI->isLoopHeader(Ei->DestBB) && + !LI->getLoopFor(Ei->DestBB)->contains(Ei->SrcBB) && Ei->InMST) + return false; + } + return true; + } + public: // Dump the Debug information about the instrumentation. void dumpEdges(raw_ostream &OS, const Twine &Message) const { @@ -291,13 +319,20 @@ template class CFGMST { return *AllEdges.back(); } - CFGMST(Function &Func, bool InstrumentFuncEntry, + CFGMST(Function &Func, bool InstrumentFuncEntry, bool InstrumentLoopEntries, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) { + BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr) + : F(Func), BPI(BPI), BFI(BFI), LI(LI), + InstrumentFuncEntry(InstrumentFuncEntry), + InstrumentLoopEntries(InstrumentLoopEntries) { + assert(!(InstrumentLoopEntries && !LI) && + "expected a LoopInfo to instrumenting loop entries"); buildEdges(); sortEdgesByWeight(); computeMinimumSpanningTree(); + assert(validateLoopEntryInstrumentation() && + "Loop entries should not be in MST when " + "InstrumentLoopEntries is on"); if (AllEdges.size() > 1 && InstrumentFuncEntry) std::iter_swap(std::move(AllEdges.begin()), std::move(AllEdges.begin() + AllEdges.size() - 1)); diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 7663852236594..dad79b2c1761e 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -52,6 +52,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) { if (Version & VARIANT_MASK_INSTR_ENTRY) { ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; } + if (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) { + ProfileKind |= InstrProfKind::LoopEntriesInstrumentation; + } if (Version & VARIANT_MASK_BYTE_COVERAGE) { ProfileKind |= InstrProfKind::SingleByteCoverage; } @@ -262,6 +265,8 @@ Error TextInstrProfReader::readHeader() { ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; else if (Str.equals_insensitive("not_entry_first")) ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; + else if (Str.equals_insensitive("instrument_loop_entries")) + ProfileKind |= InstrProfKind::LoopEntriesInstrumentation; else if (Str.equals_insensitive("single_byte_coverage")) ProfileKind |= InstrProfKind::SingleByteCoverage; else if (Str.equals_insensitive("temporal_prof_traces")) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index d8ab18d213e3d..64625dee7701e 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -877,6 +877,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { if (static_cast(ProfileKind & InstrProfKind::FunctionEntryInstrumentation)) Header.Version |= VARIANT_MASK_INSTR_ENTRY; + if (static_cast(ProfileKind & + InstrProfKind::LoopEntriesInstrumentation)) + Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES; if (static_cast(ProfileKind & InstrProfKind::SingleByteCoverage)) Header.Version |= VARIANT_MASK_BYTE_COVERAGE; if (static_cast(ProfileKind & InstrProfKind::FunctionEntryOnly)) @@ -1120,6 +1123,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (static_cast(ProfileKind & InstrProfKind::FunctionEntryInstrumentation)) OS << "# Always instrument the function entry block\n:entry_first\n"; + if (static_cast(ProfileKind & + InstrProfKind::LoopEntriesInstrumentation)) + OS << "# Always instrument the loop entry " + "blocks\n:instrument_loop_entries\n"; if (static_cast(ProfileKind & InstrProfKind::SingleByteCoverage)) OS << "# Instrument block coverage\n:single_byte_coverage\n"; InstrProfSymtab Symtab; diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 2ea89be40a3d4..f9be7f933d31e 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -820,7 +820,8 @@ bool GCOVProfiler::emitProfileNotes( SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); - CFGMST MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI); + CFGMST MST(F, /*InstrumentFuncEntry=*/false, + /*InstrumentLoopEntries=*/false, BPI, BFI); // getInstrBB can split basic blocks and push elements to AllEdges. for (size_t I : llvm::seq(0, MST.numEdges())) { diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 4d8141431a0c1..471086ce3a751 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -259,6 +259,11 @@ static cl::opt PGOInstrumentEntry( "pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock.")); +static cl::opt + PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), + cl::Hidden, + cl::desc("Force to instrument loop entries.")); + static cl::opt PGOFunctionEntryCoverage( "pgo-function-entry-coverage", cl::Hidden, cl::desc( @@ -359,6 +364,7 @@ class FunctionInstrumenter final { std::unordered_multimap &ComdatMembers; BranchProbabilityInfo *const BPI; BlockFrequencyInfo *const BFI; + LoopInfo *const LI; const PGOInstrumentationType InstrumentationType; @@ -376,14 +382,17 @@ class FunctionInstrumenter final { InstrumentationType == PGOInstrumentationType::CTXPROF; } + bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; } + public: FunctionInstrumenter( Module &M, Function &F, TargetLibraryInfo &TLI, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, + LoopInfo *LI = nullptr, PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO) : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI), - InstrumentationType(InstrumentationType) {} + LI(LI), InstrumentationType(InstrumentationType) {} void instrument(); }; @@ -439,6 +448,8 @@ createIRLevelProfileFlagVar(Module &M, if (PGOInstrumentEntry || InstrumentationType == PGOInstrumentationType::CTXPROF) ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; + if (PGOInstrumentLoopEntries) + ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES; if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; if (PGOFunctionEntryCoverage) @@ -625,12 +636,13 @@ template class FuncPGOInstrumentation { Function &Func, TargetLibraryInfo &TLI, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, - bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) + BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr, + bool IsCS = false, bool InstrumentFuncEntry = true, + bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false) : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func, HasSingleByteCoverage), - MST(F, InstrumentFuncEntry, BPI, BFI), + MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI), BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { if (BCI && PGOViewBlockCoverageGraph) BCI->viewBlockCoverageGraph(); @@ -916,9 +928,10 @@ void FunctionInstrumenter::instrument() { const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF; FuncPGOInstrumentation FuncInfo( - F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, + F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI, InstrumentationType == PGOInstrumentationType::CSFDO, - shouldInstrumentEntryBB(), PGOBlockCoverage); + shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(), + PGOBlockCoverage); auto *const Name = IsCtxProf ? cast(&F) : FuncInfo.FuncNameVar; auto *const CFGHash = @@ -1136,11 +1149,13 @@ class PGOUseFunc { PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, - ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, + LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS, + bool InstrumentFuncEntry, bool InstrumentLoopEntries, bool HasSingleByteCoverage) : F(Func), M(Modu), BFI(BFIin), PSI(PSI), - FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, - InstrumentFuncEntry, HasSingleByteCoverage), + FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS, + InstrumentFuncEntry, InstrumentLoopEntries, + HasSingleByteCoverage), FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); @@ -1923,6 +1938,7 @@ static bool InstrumentAllFunctions( Module &M, function_ref LookupTLI, function_ref LookupBPI, function_ref LookupBFI, + function_ref LookupLI, PGOInstrumentationType InstrumentationType) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. @@ -1943,10 +1959,11 @@ static bool InstrumentAllFunctions( for (auto &F : M) { if (skipPGOGen(F)) continue; - auto &TLI = LookupTLI(F); - auto *BPI = LookupBPI(F); - auto *BFI = LookupBFI(F); - FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, + TargetLibraryInfo &TLI = LookupTLI(F); + BranchProbabilityInfo *BPI = LookupBPI(F); + BlockFrequencyInfo *BFI = LookupBFI(F); + LoopInfo *LI = LookupLI(F); + FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI, InstrumentationType); FI.instrument(); } @@ -1980,8 +1997,11 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult(F); }; + auto LookupLI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; - if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, + if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI, InstrumentationType)) return PreservedAnalyses::all(); @@ -2116,7 +2136,8 @@ static bool annotateAllFunctions( function_ref LookupTLI, function_ref LookupBPI, function_ref LookupBFI, - ProfileSummaryInfo *PSI, bool IsCS) { + function_ref LookupLI, ProfileSummaryInfo *PSI, + bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -2181,22 +2202,27 @@ static bool annotateAllFunctions( bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; + bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled(); + if (PGOInstrumentLoopEntries.getNumOccurrences() > 0) + InstrumentLoopEntries = PGOInstrumentLoopEntries; bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); for (auto &F : M) { if (skipPGOUse(F)) continue; - auto &TLI = LookupTLI(F); - auto *BPI = LookupBPI(F); - auto *BFI = LookupBFI(F); + TargetLibraryInfo &TLI = LookupTLI(F); + BranchProbabilityInfo *BPI = LookupBPI(F); + BlockFrequencyInfo *BFI = LookupBFI(F); + LoopInfo *LI = LookupLI(F); if (!HasSingleByteCoverage) { // Split indirectbr critical edges here before computing the MST rather // than later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); } - PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, - InstrumentFuncEntry, HasSingleByteCoverage); + PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS, + InstrumentFuncEntry, InstrumentLoopEntries, + HasSingleByteCoverage); if (HasSingleByteCoverage) { Func.populateCoverage(PGOReader.get()); continue; @@ -2335,10 +2361,14 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult(F); }; + auto LookupLI = [&FAM](Function &F) { + return &FAM.getResult(F); + }; auto *PSI = &MAM.getResult(M); if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, - LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) + LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI, + IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll b/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll new file mode 100644 index 0000000000000..ed101271558c6 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll @@ -0,0 +1,58 @@ +; RUN: opt %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefixes=CHECK,NOTLOOPENTRIES --implicit-check-not=@llvm.instrprof.increment +; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries -S | FileCheck %s --check-prefixes=CHECK,LOOPENTRIES --implicit-check-not=@llvm.instrprof.increment +; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-entry -S | FileCheck %s --check-prefixes=CHECK,FUNCTIONENTRY --implicit-check-not=@llvm.instrprof.increment + +; CHECK: $__llvm_profile_raw_version = comdat any +; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat +; CHECK: @__profn_test_simple_for_with_bypass = private constant [27 x i8] c"test_simple_for_with_bypass" + +define i32 @test_simple_for_with_bypass(i32 %n) { +entry: +; CHECK: entry: +; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1) +; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1) +; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0) + %mask = and i32 %n, 65535 + %skip = icmp eq i32 %mask, 0 + br i1 %skip, label %end, label %for.entry + +for.entry: +; CHECK: for.entry: +; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2) + br label %for.cond + +for.cond: +; CHECK: for.cond: + %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ] + %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.body: +; CHECK: for.body: + %inc = add nsw i32 %sum, 1 + br label %for.inc + +for.inc: +; CHECK: for.inc: +; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0) +; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0) +; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1) + %inc1 = add nsw i32 %i, 1 + br label %for.cond + +for.end: +; CHECK: for.end: +; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2) +; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2) + br label %end + +end: +; CHECK: end: + %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ] + ret i32 %final_sum +} + +; CHECK: declare void @llvm.instrprof.increment(ptr, i64, i32, i32) #0 + +!1 = !{!"branch_weights", i32 100000, i32 80} diff --git a/llvm/test/Transforms/PGOProfile/loop_entries_use.ll b/llvm/test/Transforms/PGOProfile/loop_entries_use.ll new file mode 100644 index 0000000000000..616ecbaf439c3 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/loop_entries_use.ll @@ -0,0 +1,106 @@ +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-profdata merge %t/default.proftext -o %t/default.profdata +; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/default.profdata -S | FileCheck %s +; RUN: llvm-profdata merge %t/loop_entries.proftext -o %t/loop_entries.profdata +; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/loop_entries.profdata -S | FileCheck %s +; RUN: llvm-profdata merge %t/function_entry.proftext -o %t/function_entry.profdata +; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/function_entry.profdata -S | FileCheck %s + +;--- main.ll + +define i32 @test_simple_for_with_bypass(i32 %n) { +; CHECK: define i32 @test_simple_for_with_bypass(i32 %n) +; CHECK-SAME: !prof ![[ENTRY_COUNT:[0-9]*]] +entry: +; CHECK: entry: + %mask = and i32 %n, 65535 + %skip = icmp eq i32 %mask, 0 + br i1 %skip, label %end, label %for.entry +; CHECK: br i1 %skip, label %end, label %for.entry +; CHECK-SAME: !prof ![[BW_FOR_BYPASS:[0-9]+]] + +for.entry: +; CHECK: for.entry: + br label %for.cond + +for.cond: +; CHECK: for.cond: + %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ] + %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %for.body, label %for.end, !prof !1 +; CHECK: br i1 %cmp, label %for.body, label %for.end +; CHECK-SAME: !prof ![[BW_FOR_COND:[0-9]+]] + +for.body: +; CHECK: for.body: + %inc = add nsw i32 %sum, 1 + br label %for.inc + +for.inc: +; CHECK: for.inc: + %inc1 = add nsw i32 %i, 1 + br label %for.cond + +for.end: +; CHECK: for.end: + br label %end + +end: +; CHECK: end: + %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ] + ret i32 %final_sum +} + +!1 = !{!"branch_weights", i32 100000, i32 80} + +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 12} +; CHECK: ![[BW_FOR_BYPASS]] = !{!"branch_weights", i32 4, i32 8} +; CHECK: ![[BW_FOR_COND]] = !{!"branch_weights", i32 123456, i32 8} + +;--- default.proftext + +# :ir is the flag to indicate this is IR level profile. +:ir +test_simple_for_with_bypass +# Func Hash: +536873292337293370 +# Num Counters: +3 +# Counter Values: +123456 +12 +8 + +;--- loop_entries.proftext + +# :ir is the flag to indicate this is IR level profile. +:ir +# Always instrument the loop entry blocks +:instrument_loop_entries +test_simple_for_with_bypass +# Func Hash: +536873292337293370 +# Num Counters: +3 +# Counter Values: +123456 +12 +8 + +;--- function_entry.proftext + +# :ir is the flag to indicate this is IR level profile. +:ir +# Always instrument the function entry block +:entry_first +test_simple_for_with_bypass +# Func Hash: +536873292337293370 +# Num Counters: +3 +# Counter Values: +12 +123456 +8 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 2acf1cc34b2d8..1d9d7bcf76549 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2967,8 +2967,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { std::unique_ptr PS(Builder.getSummary()); bool IsIR = Reader->isIRLevelProfile(); OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end"); - if (IsIR) + if (IsIR) { OS << " entry_first = " << Reader->instrEntryBBEnabled(); + OS << " instrument_loop_entries = " << Reader->instrLoopEntriesEnabled(); + } OS << "\n"; if (ShowAllFunctions || !FuncNameFilter.empty()) OS << "Functions shown: " << ShownFunctions << "\n"; From 5cd3e9736266a86f730f6cfbdb83226b6c78b149 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 4 Dec 2024 15:10:57 +0800 Subject: [PATCH 181/191] [RISCV] Mark vmvNr.v as implicitly using vtype (#118414) This was pointed out in https://github.com/llvm/llvm-project/pull/118283#issuecomment-2512895919. We cannot move these between vtype definitions as they depend on SEW and require vill to be clear. --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 2 +- llvm/test/MachineVerifier/RISCV/subreg-liveness.mir | 4 ++-- llvm/test/tools/llvm-mca/RISCV/SiFiveP400/vmv.s | 6 +++--- llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 8e0c4826ac00d..6506b6746b151 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1726,7 +1726,7 @@ foreach n = [1, 2, 4, 8] in { def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs vrc:$vd), (ins vrc:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, VMVRSched { - let Uses = []; + let Uses = [VTYPE]; let vm = 1; } } diff --git a/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir b/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir index cb73f500ddc21..3c7b70efe7199 100644 --- a/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir +++ b/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir @@ -19,8 +19,8 @@ body: | ; CHECK: liveins: $v0, $v8, $v9, $v10, $v11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $v16m2 = PseudoVMV_V_I_M2 undef renamable $v16m2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16 + ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16, implicit $vtype renamable $v16m2 = PseudoVMV_V_I_M2 undef renamable $v16m2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16 + $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16, implicit $vtype ... diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP400/vmv.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP400/vmv.s index 31178e8e238f1..e69b7fb38295e 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP400/vmv.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP400/vmv.s @@ -293,12 +293,12 @@ vfmv.f.s f7, v16 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 280 -# CHECK-NEXT: Total Cycles: 523 +# CHECK-NEXT: Total Cycles: 524 # CHECK-NEXT: Total uOps: 280 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.54 -# CHECK-NEXT: IPC: 0.54 +# CHECK-NEXT: uOps Per Cycle: 0.53 +# CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 512.0 # CHECK: Instruction Info: diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s index 3e9dcff4e1c0a..99b72b1fabbae 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s @@ -260,12 +260,12 @@ vmv8r.v v8, v16 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 256 -# CHECK-NEXT: Total Cycles: 237 +# CHECK-NEXT: Total Cycles: 255 # CHECK-NEXT: Total uOps: 256 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.08 -# CHECK-NEXT: IPC: 1.08 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 240.0 # CHECK: Instruction Info: From 94d6b1cce5bb9449971a027e352385e72b059a3a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 3 Dec 2024 23:17:54 -0800 Subject: [PATCH 182/191] [clangd] Fix warnings This patch fixes: clang-tools-extra/clangd/XRefs.cpp:2360:11: error: unused type alias 'SK' [-Werror,-Wunused-local-typedef] clang-tools-extra/clangd/XRefs.cpp:2361:10: error: unused variable 'Kind' [-Werror,-Wunused-variable] --- clang-tools-extra/clangd/XRefs.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index d237d95b3eb65..8295b9d541f7e 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -2363,6 +2363,8 @@ outgoingCalls(const CallHierarchyItem &Item, const SymbolIndex *Index) { Kind == SK::ClassMethod || Kind == SK::StaticMethod || Kind == SK::Constructor || Kind == SK::Destructor || Kind == SK::ConversionFunction); + (void)Kind; + (void)SK::Function; auto It = CallsOut.find(Callee.ID); assert(It != CallsOut.end()); From 154c7c0bf272adc67d3ffc541a01fe8025843fd2 Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Wed, 4 Dec 2024 15:31:15 +0800 Subject: [PATCH 183/191] [Clang] Don't add top-level const qualifiers to captured function types (#118050) This aligns with the logic in `TreeTransform::RebuildQualifiedType()` where we refrain from adding const qualifiers to function types. Previously, we seemed to overlook this edge case when copy-capturing a variable that is of function type within a const-qualified lambda. This issue also reveals other related problems as in incorrect type printout and a suspicious implementation in DeduceTemplateArguments. I decide to leave them in follow-up work. Fixes #84961 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaExpr.cpp | 13 +++++++++++-- .../test/SemaCXX/lambda-capture-type-deduction.cpp | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 02284225fb4fa..755418e9550cf 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -761,6 +761,7 @@ Bug Fixes to C++ Support - Name independent data members were not correctly initialized from default member initializers. (#GH114069) - Fixed expression transformation for ``[[assume(...)]]``, allowing using pack indexing expressions within the assumption if they also occur inside of a dependent lambda. (#GH114787) +- Lambdas now capture function types without considering top-level const qualifiers. (#GH84961) - Clang now uses valid deduced type locations when diagnosing functions with trailing return type missing placeholder return type. (#GH78694) - Fixed a bug where bounds of partially expanded pack indexing expressions were checked too early. (#GH116105) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 1cb07368e8308..adad14cc0f1f6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -18431,7 +18431,11 @@ static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, // are mutable in the sense that user can change their value - they are // private instances of the captured declarations. const Capture &Cap = CSI->getCapture(Var); - if (Cap.isCopyCapture() && + // C++ [expr.prim.lambda]p10: + // The type of such a data member is [...] an lvalue reference to the + // referenced function type if the entity is a reference to a function. + // [...] + if (Cap.isCopyCapture() && !DeclRefType->isFunctionType() && !(isa(CSI) && !cast(CSI)->lambdaCaptureShouldBeConst()) && !(isa(CSI) && @@ -18741,7 +18745,12 @@ static bool captureInLambda(LambdaScopeInfo *LSI, ValueDecl *Var, // parameter-declaration-clause is not followed by mutable. DeclRefType = CaptureType.getNonReferenceType(); bool Const = LSI->lambdaCaptureShouldBeConst(); - if (Const && !CaptureType->isReferenceType()) + // C++ [expr.prim.lambda]p10: + // The type of such a data member is [...] an lvalue reference to the + // referenced function type if the entity is a reference to a function. + // [...] + if (Const && !CaptureType->isReferenceType() && + !DeclRefType->isFunctionType()) DeclRefType.addConst(); } diff --git a/clang/test/SemaCXX/lambda-capture-type-deduction.cpp b/clang/test/SemaCXX/lambda-capture-type-deduction.cpp index 234cb6806f041..b7a3d77cfc2f4 100644 --- a/clang/test/SemaCXX/lambda-capture-type-deduction.cpp +++ b/clang/test/SemaCXX/lambda-capture-type-deduction.cpp @@ -335,3 +335,17 @@ constexpr void foo() { } } // namespace GH47400 + +namespace GH84961 { + +template void g(const T &t) {} + +template void f(const T &t) { + [t] { g(t); }(); +} + +void h() { + f(h); +} + +} // namespace GH84961 From b1a48af56a62b8c0d5636c9404251700264fcd70 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 4 Dec 2024 07:37:01 +0000 Subject: [PATCH 184/191] [DAG] SimplifyDemandedVectorElts - add handling for INT<->FP conversions (#117884) --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 9 ++ llvm/test/CodeGen/PowerPC/pr38087.ll | 4 +- .../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 3 +- .../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 107 +++++++++--------- llvm/test/CodeGen/X86/fpclamptosat_vec.ll | 12 +- llvm/test/CodeGen/X86/freeze-vector.ll | 11 +- .../CodeGen/X86/vector-half-conversions.ll | 8 +- llvm/test/CodeGen/X86/widen_conv-3.ll | 4 +- 8 files changed, 78 insertions(+), 80 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 68fbd36cf6e62..eeba4b7d20f9c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3736,6 +3736,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef.clearAllBits(); } break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + // Don't fall through to generic undef -> undef handling. + return false; default: { if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, diff --git a/llvm/test/CodeGen/PowerPC/pr38087.ll b/llvm/test/CodeGen/PowerPC/pr38087.ll index 1216fa9cf8f26..933bf12cddaa6 100644 --- a/llvm/test/CodeGen/PowerPC/pr38087.ll +++ b/llvm/test/CodeGen/PowerPC/pr38087.ll @@ -11,9 +11,9 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 define void @draw_llvm_vs_variant0(<4 x float> %x) { ; CHECK-LABEL: draw_llvm_vs_variant0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxsd v3, 0(r3) -; CHECK-NEXT: vmrghh v3, v3, v3 +; CHECK-NEXT: lxsihzx v3, 0, r3 ; CHECK-NEXT: vextsh2w v3, v3 +; CHECK-NEXT: xxmrghw v3, v3, v3 ; CHECK-NEXT: xvcvsxwsp vs0, v3 ; CHECK-NEXT: xxspltw vs0, vs0, 2 ; CHECK-NEXT: xvmaddasp vs0, v2, v2 diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll index 4a6556bdc4a91..494e4bc8e068e 100644 --- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll @@ -85,8 +85,7 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X86-NEXT: vcvttpd2qq %xmm1, %xmm1 +; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 ; X86-NEXT: vmovd %xmm1, %esi ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 332fbf7188af8..2163121410553 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -141,56 +141,61 @@ declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>) define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) { ; CHECK-SSE-LABEL: fmul_pow2_8xhalf: ; CHECK-SSE: # %bb.0: -; CHECK-SSE-NEXT: subq $88, %rsp -; CHECK-SSE-NEXT: .cfi_def_cfa_offset 96 +; CHECK-SSE-NEXT: subq $104, %rsp +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 112 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; CHECK-SSE-NEXT: pslld $23, %xmm1 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] ; CHECK-SSE-NEXT: paddd %xmm2, %xmm1 ; CHECK-SSE-NEXT: cvttps2dq %xmm1, %xmm1 -; CHECK-SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: pslld $16, %xmm1 +; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: paddd %xmm2, %xmm0 ; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: pslld $16, %xmm0 -; CHECK-SSE-NEXT: psrld $16, %xmm0 ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-SSE-NEXT: psrld $16, %xmm0 ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-SSE-NEXT: psrlq $48, %xmm0 +; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3] +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: pshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3] -; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-SSE-NEXT: psrld $16, %xmm0 ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: cvtdq2ps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; CHECK-SSE-NEXT: psrlq $48, %xmm0 +; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: pshufd $238, (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3] +; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; CHECK-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: pshufd $255, (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3] +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT @@ -202,39 +207,39 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload -; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill +; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-SSE-NEXT: punpckldq (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT @@ -246,14 +251,13 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; CHECK-SSE-NEXT: punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 -; CHECK-SSE-NEXT: addq $88, %rsp +; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-SSE-NEXT: punpckldq (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-SSE-NEXT: addq $104, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 ; CHECK-SSE-NEXT: retq ; @@ -1028,17 +1032,17 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind { ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,2,u,u,u,u,u,u] -; CHECK-SSE-NEXT: pxor %xmm0, %xmm0 -; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0 +; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,2,u,u,u,u,u,u] +; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: psrld $16, %xmm0 +; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = mem[1,1,1,1] -; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1049,8 +1053,9 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind { ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT -; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE-NEXT: addq $40, %rsp ; CHECK-SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 6aad4c2ebba1d..2dedb10d42fb4 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -731,7 +731,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind { ; ; AVX512-LABEL: stest_f16i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX512-NEXT: vpmovsqd %ymm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -894,7 +894,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; ; AVX512-LABEL: utesth_f16i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 ; AVX512-NEXT: vpmovusqd %ymm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -1031,7 +1031,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind { ; ; AVX512-LABEL: ustest_f16i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 @@ -3343,7 +3343,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind { ; ; AVX512-LABEL: stest_f16i32_mm: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX512-NEXT: vpmovsqd %ymm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -3504,7 +3504,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; ; AVX512-LABEL: utesth_f16i32_mm: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 ; AVX512-NEXT: vpmovusqd %ymm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -3640,7 +3640,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind { ; ; AVX512-LABEL: ustest_f16i32_mm: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index fe240286462e9..362b3b945f962 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -630,13 +630,8 @@ define void @pr59677(i32 %x, ptr %out) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: vmovd %eax, %xmm0 -; X86-NEXT: orl $1, %eax -; X86-NEXT: vmovd %eax, %xmm1 -; X86-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X86-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) @@ -651,10 +646,6 @@ define void @pr59677(i32 %x, ptr %out) nounwind { ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: vmovd %edi, %xmm0 -; X64-NEXT: orl $1, %edi -; X64-NEXT: vmovd %edi, %xmm1 -; X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X64-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: callq sinf@PLT diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 5148d1566c629..62ee0b298ba91 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -4966,8 +4966,6 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ; ; F16C-LABEL: fptosi_2f16_to_4i32: ; F16C: # %bb.0: -; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvttps2dq %xmm0, %xmm0 ; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero @@ -4975,8 +4973,6 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ; ; AVX512-LABEL: fptosi_2f16_to_4i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero @@ -5104,8 +5100,6 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind { ; ; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32: ; AVX512-FASTLANE: # %bb.0: -; AVX512-FASTLANE-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512-FASTLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0 ; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero @@ -5212,7 +5206,7 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind { ; ; AVX512F-LABEL: fptoui_4f16_to_4i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/widen_conv-3.ll b/llvm/test/CodeGen/X86/widen_conv-3.ll index 5887834f9af2f..f9b588b8b8915 100644 --- a/llvm/test/CodeGen/X86/widen_conv-3.ll +++ b/llvm/test/CodeGen/X86/widen_conv-3.ll @@ -10,7 +10,7 @@ define void @convert_v2i16_to_v2f32(ptr %dst.addr, <2 x i16> %src) nounwind { ; X86-SSE2-LABEL: convert_v2i16_to_v2f32: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; X86-SSE2-NEXT: psrad $16, %xmm0 ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movlps %xmm0, (%eax) @@ -26,7 +26,7 @@ define void @convert_v2i16_to_v2f32(ptr %dst.addr, <2 x i16> %src) nounwind { ; ; X64-SSE2-LABEL: convert_v2i16_to_v2f32: ; X64-SSE2: # %bb.0: # %entry -; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; X64-SSE2-NEXT: psrad $16, %xmm0 ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE2-NEXT: movlps %xmm0, (%rdi) From 455b4fd01ae9b2a78be98bcd26db2b700709c545 Mon Sep 17 00:00:00 2001 From: Mariusz Sikora Date: Wed, 4 Dec 2024 08:42:04 +0100 Subject: [PATCH 185/191] [AMDGPU] Emit amdgcn.if.break in the same BB as amdgcn.loop (#118081) Before this change if.break was placed in wrong loop level which resulted in accumulating values only from last iteration of the inner loop. --- .../Target/AMDGPU/SIAnnotateControlFlow.cpp | 7 +- llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 26 +-- .../CodeGen/AMDGPU/nested-loop-conditions.ll | 150 +++++++++--------- 3 files changed, 97 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index fa39df9ae5ba8..4ff6fc32b642d 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -224,8 +224,13 @@ Value *SIAnnotateControlFlow::handleLoopCondition( if (Instruction *Inst = dyn_cast(Cond)) { BasicBlock *Parent = Inst->getParent(); Instruction *Insert; - if (L->contains(Inst)) { + if (LI->getLoopFor(Parent) == L) { + // Insert IfBreak in the same BB as Cond, which can help + // SILowerControlFlow to know that it does not have to insert an + // AND with EXEC. Insert = Parent->getTerminator(); + } else if (L->contains(Inst)) { + Insert = Term; } else { Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime(); } diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index 65c44768d3d88..6c62f3f225cd9 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -27,10 +27,10 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) ; OPT-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]]) ; OPT-NEXT: [[TMP7:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]]) -; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]]) ; OPT-NEXT: br i1 [[TMP7]], label [[FLOW1]], label [[LOOP]] ; OPT: Flow1: ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]]) +; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]]) ; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]]) ; OPT-NEXT: br i1 [[TMP9]], label [[IF:%.*]], label [[LOOP_OUTER]] ; OPT: IF: @@ -57,33 +57,37 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { ; GCN-NEXT: .LBB0_2: ; %LOOP.outer ; GCN-NEXT: ; =>This Loop Header: Depth=1 ; GCN-NEXT: ; Child Loop BB0_4 Depth 2 -; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GCN-NEXT: ; implicit-def: $sgpr2_sgpr3 +; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9 +; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: s_branch .LBB0_4 ; GCN-NEXT: .LBB0_3: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 -; GCN-NEXT: s_or_b64 exec, exec, s[8:9] -; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; GCN-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] +; GCN-NEXT: s_or_b64 exec, exec, s[10:11] +; GCN-NEXT: s_and_b64 s[10:11], exec, s[8:9] +; GCN-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; GCN-NEXT: s_and_b64 s[10:11], s[6:7], exec +; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] ; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: .LBB0_4: ; %LOOP ; GCN-NEXT: ; Parent Loop BB0_2 Depth=1 ; GCN-NEXT: ; => This Inner Loop Header: Depth=2 ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v0, v4 -; GCN-NEXT: s_or_b64 s[2:3], s[2:3], exec ; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_or_b64 s[8:9], s[8:9], exec +; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GCN-NEXT: s_cbranch_execz .LBB0_3 ; GCN-NEXT: ; %bb.5: ; %ENDIF ; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0 ; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec -; GCN-NEXT: s_and_b64 s[10:11], vcc, exec -; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0 +; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec +; GCN-NEXT: s_and_b64 s[12:13], vcc, exec +; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] ; GCN-NEXT: s_branch .LBB0_3 ; GCN-NEXT: .LBB0_6: ; %IF ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll index a52d9ff526c2a..bd6ef9e088b12 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s @@ -46,51 +46,52 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocap ; GCN-NEXT: s_cbranch_vccz .LBB0_6 ; GCN-NEXT: .LBB0_7: ; %DummyReturnBlock ; GCN-NEXT: s_endpgm -; IR-LABEL: @reduced_nested_loop_conditions( -; IR-NEXT: bb: +; IR-LABEL: define amdgpu_kernel void @reduced_nested_loop_conditions( +; IR-SAME: ptr addrspace(3) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; IR-NEXT: [[BB:.*]]: ; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]] -; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) [[ARG:%.*]], i32 [[MY_TMP]] +; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) [[ARG]], i32 [[MY_TMP]] ; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, ptr addrspace(3) [[MY_TMP1]], align 8 -; IR-NEXT: br label [[BB5:%.*]] -; IR: bb3: -; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]] -; IR: bb4: -; IR-NEXT: br label [[FLOW:%.*]] -; IR: bb5: -; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[BB10:%.*]] ], [ 0, [[BB:%.*]] ] -; IR-NEXT: [[MY_TMP6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP5:%.*]], [[BB10]] ] +; IR-NEXT: br label %[[BB5:.*]] +; IR: [[BB3:.*]]: +; IR-NEXT: br i1 true, label %[[BB4:.*]], label %[[BB13:.*]] +; IR: [[BB4]]: +; IR-NEXT: br label %[[FLOW:.*]] +; IR: [[BB5]]: +; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], %[[BB10:.*]] ], [ 0, %[[BB]] ] +; IR-NEXT: [[MY_TMP6:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB10]] ] ; IR-NEXT: [[MY_TMP7:%.*]] = icmp eq i32 [[MY_TMP6]], 1 ; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP7]]) ; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 -; IR-NEXT: br i1 [[TMP1]], label [[BB8:%.*]], label [[FLOW]] -; IR: bb8: -; IR-NEXT: br label [[BB13]] -; IR: bb9: -; IR-NEXT: br i1 false, label [[BB3:%.*]], label [[BB9:%.*]] -; IR: bb10: +; IR-NEXT: br i1 [[TMP1]], label %[[BB8:.*]], label %[[FLOW]] +; IR: [[BB8]]: +; IR-NEXT: br label %[[BB13]] +; IR: [[BB9:.*]]: +; IR-NEXT: br i1 false, label %[[BB3]], label %[[BB9]] +; IR: [[BB10]]: ; IR-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]]) -; IR-NEXT: br i1 [[TMP3]], label [[BB23:%.*]], label [[BB5]] -; IR: Flow: -; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[MY_TMP22:%.*]], [[BB4]] ], [ true, [[BB5]] ] -; IR-NEXT: [[TMP5]] = phi i32 [ [[MY_TMP21:%.*]], [[BB4]] ], [ undef, [[BB5]] ] +; IR-NEXT: br i1 [[TMP3]], label %[[BB23:.*]], label %[[BB5]] +; IR: [[FLOW]]: +; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[MY_TMP22:%.*]], %[[BB4]] ], [ true, %[[BB5]] ] +; IR-NEXT: [[TMP5]] = phi i32 [ [[MY_TMP21:%.*]], %[[BB4]] ], [ undef, %[[BB5]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) ; IR-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]]) -; IR-NEXT: br label [[BB10]] -; IR: bb13: -; IR-NEXT: [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], [[BB3]] ], [ true, [[BB8]] ] +; IR-NEXT: br label %[[BB10]] +; IR: [[BB13]]: +; IR-NEXT: [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], %[[BB3]] ], [ true, %[[BB8]] ] ; IR-NEXT: [[MY_TMP15:%.*]] = bitcast i64 [[MY_TMP2]] to <2 x i32> -; IR-NEXT: br i1 [[MY_TMP14]], label [[BB16:%.*]], label [[BB20:%.*]] -; IR: bb16: +; IR-NEXT: br i1 [[MY_TMP14]], label %[[BB16:.*]], label %[[BB20:.*]] +; IR: [[BB16]]: ; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1 ; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) undef, i32 [[MY_TMP17]] ; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[MY_TMP18]], align 4 -; IR-NEXT: br label [[BB20]] -; IR: bb20: -; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ] -; IR-NEXT: [[MY_TMP22]] = phi i1 [ false, [[BB16]] ], [ [[MY_TMP14]], [[BB13]] ] -; IR-NEXT: br label [[BB9]] -; IR: bb23: +; IR-NEXT: br label %[[BB20]] +; IR: [[BB20]]: +; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], %[[BB16]] ], [ 0, %[[BB13]] ] +; IR-NEXT: [[MY_TMP22]] = phi i1 [ false, %[[BB16]] ], [ [[MY_TMP14]], %[[BB13]] ] +; IR-NEXT: br label %[[BB9]] +; IR: [[BB23]]: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]]) ; IR-NEXT: ret void bb: @@ -188,66 +189,67 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %ar ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_endpgm -; IR-LABEL: @nested_loop_conditions( -; IR-NEXT: bb: +; IR-LABEL: define amdgpu_kernel void @nested_loop_conditions( +; IR-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; IR-NEXT: [[BB:.*]]: ; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 ; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9 -; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]] -; IR: bb14.lr.ph: +; IR-NEXT: br i1 [[MY_TMP1235]], label %[[BB14_LR_PH:.*]], label %[[FLOW:.*]] +; IR: [[BB14_LR_PH]]: ; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]] ; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64 -; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[ARG:%.*]], i64 [[MY_TMP1]] +; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[ARG]], i64 [[MY_TMP1]] ; IR-NEXT: [[MY_TMP3:%.*]] = load i64, ptr addrspace(1) [[MY_TMP2]], align 16 ; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, ptr addrspace(1) undef, align 16 ; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0 -; IR-NEXT: br label [[BB14:%.*]] -; IR: Flow3: +; IR-NEXT: br label %[[BB14:.*]] +; IR: [[FLOW3:.*]]: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]]) ; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]]) ; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 -; IR-NEXT: br i1 [[TMP1]], label [[BB4_BB13_CRIT_EDGE:%.*]], label [[FLOW4:%.*]] -; IR: bb4.bb13_crit_edge: -; IR-NEXT: br label [[FLOW4]] -; IR: Flow4: -; IR-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[BB4_BB13_CRIT_EDGE]] ], [ false, [[FLOW3:%.*]] ] +; IR-NEXT: br i1 [[TMP1]], label %[[BB4_BB13_CRIT_EDGE:.*]], label %[[FLOW4:.*]] +; IR: [[BB4_BB13_CRIT_EDGE]]: +; IR-NEXT: br label %[[FLOW4]] +; IR: [[FLOW4]]: +; IR-NEXT: [[TMP3:%.*]] = phi i1 [ true, %[[BB4_BB13_CRIT_EDGE]] ], [ false, %[[FLOW3]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) -; IR-NEXT: br label [[FLOW]] -; IR: bb13: -; IR-NEXT: br label [[BB31:%.*]] -; IR: Flow: -; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], [[FLOW4]] ], [ true, [[BB:%.*]] ] +; IR-NEXT: br label %[[FLOW]] +; IR: [[BB13:.*]]: +; IR-NEXT: br label %[[BB31:.*]] +; IR: [[FLOW]]: +; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], %[[FLOW4]] ], [ true, %[[BB]] ] ; IR-NEXT: [[TMP5:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP4]]) ; IR-NEXT: [[TMP6:%.*]] = extractvalue { i1, i64 } [[TMP5]], 0 ; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP5]], 1 -; IR-NEXT: br i1 [[TMP6]], label [[BB13:%.*]], label [[BB31]] -; IR: bb14: -; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], [[FLOW1:%.*]] ], [ 0, [[BB14_LR_PH]] ] -; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], [[BB14_LR_PH]] ], [ [[TMP12:%.*]], [[FLOW1]] ] -; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], [[BB14_LR_PH]] ], [ [[TMP11:%.*]], [[FLOW1]] ] +; IR-NEXT: br i1 [[TMP6]], label %[[BB13]], label %[[BB31]] +; IR: [[BB14]]: +; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], %[[FLOW1:.*]] ], [ 0, %[[BB14_LR_PH]] ] +; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], %[[BB14_LR_PH]] ], [ [[TMP12:%.*]], %[[FLOW1]] ] +; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], %[[BB14_LR_PH]] ], [ [[TMP11:%.*]], %[[FLOW1]] ] ; IR-NEXT: [[MY_TMP15:%.*]] = icmp eq i32 [[MY_TMP1037]], 1 ; IR-NEXT: [[TMP8:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP15]]) ; IR-NEXT: [[TMP9:%.*]] = extractvalue { i1, i64 } [[TMP8]], 0 ; IR-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP8]], 1 -; IR-NEXT: br i1 [[TMP9]], label [[BB16:%.*]], label [[FLOW1]] -; IR: bb16: +; IR-NEXT: br i1 [[TMP9]], label %[[BB16:.*]], label %[[FLOW1]] +; IR: [[BB16]]: ; IR-NEXT: [[MY_TMP17:%.*]] = bitcast i64 [[MY_TMP3]] to <2 x i32> -; IR-NEXT: br label [[BB18:%.*]] -; IR: Flow1: -; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[BB14]] ] -; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[BB14]] ] -; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ] +; IR-NEXT: br label %[[BB18:.*]] +; IR: [[FLOW1]]: +; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], %[[BB21:.*]] ], [ undef, %[[BB14]] ] +; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], %[[BB21]] ], [ undef, %[[BB14]] ] +; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], %[[BB21]] ], [ true, %[[BB14]] ] +; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], %[[BB21]] ], [ false, %[[BB14]] ] +; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, %[[BB21]] ], [ true, %[[BB14]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) ; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) ; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]]) -; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]] -; IR: bb18: +; IR-NEXT: br i1 [[TMP17]], label %[[FLOW2:.*]], label %[[BB14]] +; IR: [[BB18]]: ; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 ; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9 -; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]] -; IR: bb21: +; IR-NEXT: br i1 [[MY_TMP20]], label %[[BB21]], label %[[BB18]] +; IR: [[BB21]]: ; IR-NEXT: [[MY_TMP22:%.*]] = extractelement <2 x i32> [[MY_TMP17]], i64 1 ; IR-NEXT: [[MY_TMP23:%.*]] = lshr i32 [[MY_TMP22]], 16 ; IR-NEXT: [[MY_TMP24:%.*]] = select i1 undef, i32 undef, i32 [[MY_TMP23]] @@ -263,16 +265,16 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %ar ; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 ; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 ; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9 -; IR-NEXT: br label [[FLOW1]] -; IR: Flow2: +; IR-NEXT: br label %[[FLOW1]] +; IR: [[FLOW2]]: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) ; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) ; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0 ; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1 -; IR-NEXT: br i1 [[TMP19]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] -; IR: bb31.loopexit: -; IR-NEXT: br label [[FLOW3]] -; IR: bb31: +; IR-NEXT: br i1 [[TMP19]], label %[[BB31_LOOPEXIT:.*]], label %[[FLOW3]] +; IR: [[BB31_LOOPEXIT]]: +; IR-NEXT: br label %[[FLOW3]] +; IR: [[BB31]]: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) ; IR-NEXT: store volatile i32 0, ptr addrspace(1) undef, align 4 ; IR-NEXT: ret void From 2a30bfcef368667247ebbe30be84f73b92dbe800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Wed, 4 Dec 2024 09:36:05 +0100 Subject: [PATCH 186/191] [mlir] Improve error message when number of operands and types differ (#118488) If using a variadic operand, the error message given if the number of types and operands do not match would be along the lines of: ``` 3 operands present, but expected 2 ``` This error message is confusing for multiple reasons, particular for beginners: * If the intention is to have 3 operands, it does not point out why it expects 2. The user may actually just want to add a type to the type list * It reads as if a verifier error rather than a parser error, giving the impression the Op only supports 2 operands. This PR attempts to improve the error message by first noting the issue ("number of operands and types mismatch") and mentioning how many operands and types it received. --- mlir/include/mlir/IR/OpImplementation.h | 3 ++- mlir/test/Dialect/LLVMIR/invalid.mlir | 4 ++-- mlir/test/Dialect/Linalg/transform-ops-invalid.mlir | 2 +- mlir/test/Dialect/SCF/invalid.mlir | 4 ++-- mlir/test/Dialect/SPIRV/IR/memory-ops.mlir | 6 +++--- mlir/test/Dialect/Tensor/invalid.mlir | 2 +- mlir/test/Dialect/Vector/invalid.mlir | 2 +- 7 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h index a7222794f320b..6c1ff4d0e5e6b 100644 --- a/mlir/include/mlir/IR/OpImplementation.h +++ b/mlir/include/mlir/IR/OpImplementation.h @@ -1604,7 +1604,8 @@ class OpAsmParser : public AsmParser { size_t typeSize = llvm::range_size(types); if (operandSize != typeSize) return emitError(loc) - << operandSize << " operands present, but expected " << typeSize; + << "number of operands and types do not match: got " << operandSize + << " operands and " << typeSize << " types"; for (auto [operand, type] : llvm::zip_equal(operands, types)) if (resolveOperand(operand, type, result)) diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 5677d7ff41202..25806d9d0edd7 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -91,14 +91,14 @@ func.func @alloca_non_integer_alignment() { // ----- func.func @gep_missing_input_result_type(%pos : i64, %base : !llvm.ptr) { - // expected-error@+1 {{2 operands present, but expected 0}} + // expected-error@+1 {{number of operands and types do not match: got 2 operands and 0 types}} llvm.getelementptr %base[%pos] : () -> (), i64 } // ----- func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { - // expected-error@+1 {{2 operands present, but expected 0}} + // expected-error@+1 {{number of operands and types do not match: got 2 operands and 0 types}} llvm.getelementptr %base[%pos] : () -> (!llvm.ptr), i64 } diff --git a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir index fbebb97a11983..6584596cdfdb2 100644 --- a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir @@ -77,7 +77,7 @@ transform.sequence failures(propagate) { transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): %0 = transform.param.constant 2 : i64 -> !transform.param - // expected-error@below {{custom op 'transform.structured.vectorize' 1 operands present, but expected 2}} + // expected-error@+1 {{custom op 'transform.structured.vectorize' number of operands and types do not match: got 1 operands and 2 types}} transform.structured.vectorize %arg0 vector_sizes [%0, 2] : !transform.any_op, !transform.param, !transform.param } diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir index 337eb9eeb8fa5..80576be880127 100644 --- a/mlir/test/Dialect/SCF/invalid.mlir +++ b/mlir/test/Dialect/SCF/invalid.mlir @@ -247,7 +247,7 @@ func.func @parallel_more_results_than_reduces( func.func @parallel_more_results_than_initial_values( %arg0 : index, %arg1: index, %arg2: index) { - // expected-error@+1 {{'scf.parallel' 0 operands present, but expected 1}} + // expected-error@+1 {{'scf.parallel' number of operands and types do not match: got 0 operands and 1 types}} %res = scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) -> f32 { scf.reduce(%arg0 : index) { ^bb0(%lhs: index, %rhs: index): @@ -609,7 +609,7 @@ func.func @wrong_num_results(%in: tensor<100xf32>, %out: tensor<100xf32>) { %c1 = arith.constant 1 : index %num_threads = arith.constant 100 : index - // expected-error @+1 {{1 operands present, but expected 2}} + // expected-error@+1 {{number of operands and types do not match: got 1 operands and 2 types}} %result:2 = scf.forall (%thread_idx) in (%num_threads) shared_outs(%o = %out) -> (tensor<100xf32>, tensor<100xf32>) { %1 = tensor.extract_slice %in[%thread_idx][1][1] : tensor<100xf32> to tensor<1xf32> scf.forall.in_parallel { diff --git a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir index 5aef6135afd97..57ff94762ff68 100644 --- a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir @@ -57,7 +57,7 @@ func.func @access_chain_non_composite() -> () { func.func @access_chain_no_indices(%index0 : i32) -> () { %0 = spirv.Variable : !spirv.ptr>, Function> - // expected-error @+1 {{custom op 'spirv.AccessChain' 0 operands present, but expected 1}} + // expected-error @+1 {{custom op 'spirv.AccessChain' number of operands and types do not match: got 0 operands and 1 types}} %1 = spirv.AccessChain %0[] : !spirv.ptr>, Function>, i32 -> !spirv.ptr return } @@ -75,7 +75,7 @@ func.func @access_chain_missing_comma(%index0 : i32) -> () { func.func @access_chain_invalid_indices_types_count(%index0 : i32) -> () { %0 = spirv.Variable : !spirv.ptr>, Function> - // expected-error @+1 {{custom op 'spirv.AccessChain' 1 operands present, but expected 2}} + // expected-error @+1 {{custom op 'spirv.AccessChain' number of operands and types do not match: got 1 operands and 2 types}} %1 = spirv.AccessChain %0[%index0] : !spirv.ptr>, Function>, i32, i32 -> !spirv.ptr, Function> return } @@ -84,7 +84,7 @@ func.func @access_chain_invalid_indices_types_count(%index0 : i32) -> () { func.func @access_chain_missing_indices_type(%index0 : i32) -> () { %0 = spirv.Variable : !spirv.ptr>, Function> - // expected-error @+1 {{custom op 'spirv.AccessChain' 2 operands present, but expected 1}} + // expected-error @+1 {{custom op 'spirv.AccessChain' number of operands and types do not match: got 2 operands and 1 types}} %1 = spirv.AccessChain %0[%index0, %index0] : !spirv.ptr>, Function>, i32 -> !spirv.ptr return } diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir index 77cae1cc5f242..83cb4b9d4ab24 100644 --- a/mlir/test/Dialect/Tensor/invalid.mlir +++ b/mlir/test/Dialect/Tensor/invalid.mlir @@ -90,7 +90,7 @@ func.func @tensor.from_elements_wrong_result_type() { // ----- func.func @tensor.from_elements_wrong_elements_count() { - // expected-error@+2 {{1 operands present, but expected 2}} + // expected-error@+2 {{number of operands and types do not match: got 1 operands and 2 types}} %c0 = arith.constant 0 : index %0 = tensor.from_elements %c0 : tensor<2xindex> return diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 9f7efa15ed520..1a70791fae125 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1803,7 +1803,7 @@ func.func @deinterleave_scalable_rank_fail(%vec : vector<2x[4]xf32>) { // ----- func.func @invalid_from_elements(%a: f32) { - // expected-error @+1 {{'vector.from_elements' 1 operands present, but expected 2}} + // expected-error @+1 {{'vector.from_elements' number of operands and types do not match: got 1 operands and 2 types}} vector.from_elements %a : vector<2xf32> return } From 4df18ab7da17953277c7879782a37f4ed8e39beb Mon Sep 17 00:00:00 2001 From: Konrad Kleine Date: Wed, 4 Dec 2024 09:50:56 +0100 Subject: [PATCH 187/191] [mlir] Specify deps via `LLVM_LINK_COMPONENTS` (#118542) This specifies the dependencies to link against with `LLVM_LINK_COMPONENTS` for the `mlir/test/Target/LLVM/MLIRTargetLLVMTests` binary. Before, the dependencies where directly added to the `target_link_libraries()` call which caused the problems I describe next. When doing a build of LLVM with MLIR I want to link against `libLLVM.so` instead of statically linking `libLLVMSupport.a`. MLIR on the other side seems to statically link against `libLLVMSupport.a` because when I link to the shared library `libLLVM.so` I get: ``` CommandLine Error: Option 'aarch64-ptrauth-auth-checks' registered more than once! ``` This error indicates that the `Support` library is linked twice in the `MLIRTargetLLVMTest` binary. Here's the creation of the `MLIRTargetLLVMTest` binary before (Notice the `libLLVMSupport.a`): ``` [6535/6847] : && /usr/bin/clang++ -O2 -flto=thin -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-U_FORTIFY_SOURCE,-D_FORTIFY_SOURCE=3 -Wp,-D_GLIBCXX_ASSERTIONS --config=/usr/lib/rpm/redhat/redhat-hardened-clang.cfg -fstack-protector-strong -mbranch-protection=standard -fasynchronous-unwind-tables -D_DEFAULT_SOURCE -Dasm=__asm__ -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Wundef -Werror=mismatched-tags -O2 -g -DNDEBUG -Wl,-z,relro -Wl,--as-needed -Wl,-z,pack-relative-relocs -Wl,-z,now --config=/usr/lib/rpm/redhat/redhat-hardened-clang-ld.cfg -flto=thin -ffat-lto-objects -Wl,--build-id=sha1 -Wl,--gc-sections -fno-lto tools/mlir/unittests/Target/LLVM/CMakeFiles/MLIRTargetLLVMTests.dir/SerializeNVVMTarget.cpp.o tools/mlir/unittests/Target/LLVM/CMakeFiles/MLIRTargetLLVMTests.dir/SerializeROCDLTarget.cpp.o tools/mlir/unittests/Target/LLVM/CMakeFiles/MLIRTargetLLVMTests.dir/SerializeToLLVMBitcode.cpp.o -o tools/mlir/unittests/Target/LLVM/MLIRTargetLLVMTests -Wl,-rpath,/builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/lib64 lib64/libllvm_gtest_main.a lib64/libllvm_gtest.a lib64/libMLIRTargetLLVM.a lib64/libMLIRNVVMTarget.a lib64/libMLIRROCDLTarget.a lib64/libMLIRGPUDialect.a lib64/libMLIRNVVMDialect.a lib64/libMLIRLLVMDialect.a lib64/libMLIRLLVMToLLVMIRTranslation.a lib64/libMLIRBuiltinToLLVMIRTranslation.a lib64/libMLIRNVVMToLLVMIRTranslation.a lib64/libMLIRROCDLToLLVMIRTranslation.a lib64/libMLIRGPUToLLVMIRTranslation.a lib64/libLLVMAArch64CodeGen.a lib64/libLLVMAArch64Desc.a lib64/libLLVMAArch64Info.a -lpthread lib64/libMLIRTargetLLVM.a lib64/libMLIRROCDLDialect.a lib64/libMLIRExecutionEngineUtils.a lib64/libMLIRGPUDialect.a lib64/libMLIRMemRefDialect.a lib64/libMLIRArithUtils.a lib64/libMLIRDialectUtils.a lib64/libMLIRComplexDialect.a lib64/libMLIRArithAttrToLLVMConversion.a lib64/libMLIRArithDialect.a lib64/libMLIRCastInterfaces.a lib64/libMLIRDialect.a lib64/libMLIRInferIntRangeCommon.a lib64/libMLIRUBDialect.a lib64/libMLIRShapedOpInterfaces.a lib64/libMLIRTargetLLVMIRExport.a lib64/libMLIRDLTIDialect.a lib64/libMLIRLLVMIRTransforms.a lib64/libMLIRNVVMDialect.a lib64/libMLIRLLVMDialect.a lib64/libMLIRFuncDialect.a lib64/libMLIRTransforms.a lib64/libMLIRMemorySlotInterfaces.a lib64/libMLIRCopyOpInterface.a lib64/libMLIRRuntimeVerifiableOpInterface.a lib64/libMLIRTranslateLib.a lib64/libMLIRParser.a lib64/libMLIRBytecodeReader.a lib64/libMLIRAsmParser.a lib64/libMLIRTransformUtils.a lib64/libMLIRSubsetOpInterface.a lib64/libMLIRValueBoundsOpInterface.a lib64/libMLIRDestinationStyleOpInterface.a lib64/libMLIRRewrite.a lib64/libMLIRRewritePDL.a lib64/libMLIRPDLToPDLInterp.a lib64/libMLIRPass.a lib64/libMLIRAnalysis.a lib64/libMLIRControlFlowInterfaces.a lib64/libMLIRInferIntRangeInterface.a lib64/libMLIRCallInterfaces.a lib64/libMLIRDataLayoutInterfaces.a lib64/libMLIRViewLikeInterface.a lib64/libMLIRLoopLikeInterface.a lib64/libMLIRPresburger.a lib64/libMLIRPDLInterpDialect.a lib64/libMLIRFunctionInterfaces.a lib64/libMLIRPDLDialect.a lib64/libMLIRSideEffectInterfaces.a lib64/libMLIRInferTypeOpInterface.a lib64/libMLIRIR.a lib64/libMLIRSupport.a lib64/libLLVM.so.19.1 lib64/libLLVMAArch64Utils.a lib64/libLLVMAsmPrinter.a lib64/libLLVMCFGuard.a lib64/libLLVMGlobalISel.a lib64/libLLVMSelectionDAG.a lib64/libLLVMCodeGen.a lib64/libLLVMScalarOpts.a lib64/libLLVMAggressiveInstCombine.a lib64/libLLVMInstCombine.a lib64/libLLVMBitWriter.a lib64/libLLVMObjCARCOpts.a lib64/libLLVMCodeGenTypes.a lib64/libLLVMTarget.a lib64/libLLVMVectorize.a lib64/libLLVMTransformUtils.a lib64/libLLVMAnalysis.a lib64/libLLVMProfileData.a lib64/libLLVMSymbolize.a lib64/libLLVMDebugInfoDWARF.a lib64/libLLVMDebugInfoPDB.a lib64/libLLVMObject.a lib64/libLLVMMCParser.a lib64/libLLVMMC.a lib64/libLLVMIRReader.a lib64/libLLVMBitReader.a lib64/libLLVMAsmParser.a lib64/libLLVMTextAPI.a lib64/libLLVMDebugInfoCodeView.a lib64/libLLVMDebugInfoMSF.a lib64/libLLVMDebugInfoBTF.a lib64/libLLVMCore.a lib64/libLLVMBinaryFormat.a lib64/libLLVMRemarks.a lib64/libLLVMBitstreamReader.a lib64/libLLVMTargetParser.a lib64/libLLVMSupport.a lib64/libLLVMDemangle.a -lrt -ldl -lm /usr/lib64/libz.so /usr/lib64/libzstd.so && : ``` Here's the full error: ``` [24/25] cd /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/test && /usr/bin/python3 /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/./bin/llvm-lit -vv /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/test : CommandLine Error: Option 'aarch64-ptrauth-auth-checks' registered more than once! LLVM ERROR: inconsistency in registered CommandLine options llvm-lit: /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/utils/lit/lit/formats/googletest.py:38: warning: unable to discover google-tests in '/builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/unittests/Target/LLVM/./MLIRTargetLLVMTests': Command '['/builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/unittests/Target/LLVM/./MLIRTargetLLVMTests', '--gtest_list_tests', '--gtest_filter=-*DISABLED_*']' died with .. Process output: b'' error: filter did not match any tests (of 2704 discovered). Use '--allow-empty-runs' to suppress this error. FAILED: tools/mlir/test/CMakeFiles/check-mlir /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/test/CMakeFiles/check-mlir ``` Here's the CMake invocation: ``` /usr/bin/cmake -S . -B redhat-linux-build -DCMAKE_C_FLAGS_RELEASE:STRING=-DNDEBUG -DCMAKE_CXX_FLAGS_RELEASE:STRING=-DNDEBUG -DCMAKE_Fortran_FLAGS_RELEASE:STRING=-DNDEBUG -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_INSTALL_DO_STRIP:BOOL=OFF -DCMAKE_INSTALL_PREFIX:PATH=/usr -DINCLUDE_INSTALL_DIR:PATH=/usr/include -DLIB_INSTALL_DIR:PATH=/usr/lib64 -DSYSCONF_INSTALL_DIR:PATH=/etc -DSHARE_INSTALL_PREFIX:PATH=/usr/share -DLIB_SUFFIX=64 -DBUILD_SHARED_LIBS:BOOL=ON -G Ninja '' -DCLANG_BUILD_EXAMPLES:BOOL=OFF -DCLANG_CONFIG_FILE_SYSTEM_DIR=/etc/clang/ -DCLANG_DEFAULT_PIE_ON_LINUX=OFF -DCLANG_DEFAULT_UNWINDLIB=libgcc -DCLANG_ENABLE_ARCMT:BOOL=ON -DCLANG_ENABLE_STATIC_ANALYZER:BOOL=ON -DCLANG_INCLUDE_DOCS:BOOL=ON -DCLANG_INCLUDE_TESTS:BOOL=ON -DCLANG_LINK_CLANG_DYLIB=ON -DCLANG_PLUGIN_SUPPORT:BOOL=ON '-DCLANG_REPOSITORY_STRING=Fedora 19.1.3-5.fc42' -DLLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR=../clang-tools-extra -DCLANG_RESOURCE_DIR=../lib/clang/19 -DCOMPILER_RT_INCLUDE_TESTS:BOOL=OFF -DCOMPILER_RT_INSTALL_PATH=/usr/lib/clang/19 -DLLVM_ENABLE_DOXYGEN:BOOL=OFF -DLLVM_ENABLE_SPHINX:BOOL=ON -DLLVM_BUILD_DOCS:BOOL=ON -DSPHINX_EXECUTABLE=/usr/bin/sphinx-build-3 -DSPHINX_OUTPUT_HTML:BOOL=OFF -DSPHINX_OUTPUT_MAN:BOOL=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF -DLLDB_DISABLE_CURSES:BOOL=OFF -DLLDB_DISABLE_LIBEDIT:BOOL=OFF -DLLDB_DISABLE_PYTHON:BOOL=OFF -DLLDB_ENFORCE_STRICT_TEST_REQUIREMENTS:BOOL=ON -DLLVM_APPEND_VC_REV:BOOL=OFF -DLLVM_BUILD_EXAMPLES:BOOL=OFF -DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON -DLLVM_BUILD_RUNTIME:BOOL=ON -DLLVM_BUILD_TOOLS:BOOL=ON -DLLVM_BUILD_UTILS:BOOL=ON -DLLVM_COMMON_CMAKE_UTILS=/usr/share/llvm/cmake -DLLVM_DEFAULT_TARGET_TRIPLE=aarch64-redhat-linux-gnu -DLLVM_DYLIB_COMPONENTS=all -DLLVM_ENABLE_EH=ON -DLLVM_ENABLE_FFI:BOOL=ON -DLLVM_ENABLE_LIBCXX:BOOL=OFF -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON '-DLLVM_ENABLE_PROJECTS=clang;clang-tools-extra;lld;lldb;mlir' -DLLVM_ENABLE_RTTI:BOOL=ON '-DLLVM_ENABLE_RUNTIMES=compiler-rt;openmp;offload' -DLLVM_ENABLE_ZLIB:BOOL=ON -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=AVR -DLLVM_INCLUDE_BENCHMARKS=OFF -DLLVM_INCLUDE_EXAMPLES:BOOL=ON -DLLVM_INCLUDE_TOOLS:BOOL=ON -DLLVM_INCLUDE_UTILS:BOOL=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY:BOOL=OFF -DLLVM_INSTALL_UTILS:BOOL=ON -DLLVM_LINK_LLVM_DYLIB:BOOL=ON -DLLVM_PARALLEL_LINK_JOBS=1 -DLLVM_TARGETS_TO_BUILD=all -DLLVM_TOOLS_INSTALL_DIR:PATH=bin -DLLVM_UNREACHABLE_OPTIMIZE:BOOL=OFF -DLLVM_USE_PERF:BOOL=ON -DLLVM_UTILS_INSTALL_DIR:PATH=bin -DMLIR_INCLUDE_DOCS:BOOL=ON -DMLIR_INCLUDE_TESTS:BOOL=ON -DMLIR_INCLUDE_INTEGRATION_TESTS:BOOL=OFF -DMLIR_INSTALL_AGGREGATE_OBJECTS=OFF -DMLIR_BUILD_MLIR_C_DYLIB=ON -DMLIR_ENABLE_BINDINGS_PYTHON:BOOL=ON -DOPENMP_INSTALL_LIBDIR=lib64 -DLIBOMP_INSTALL_ALIASES=OFF -DLLVM_BUILD_TESTS:BOOL=ON -DLLVM_INCLUDE_TESTS:BOOL=ON -DLLVM_INSTALL_GTEST:BOOL=ON -DLLVM_LIT_ARGS=-vv -DLLVM_UNITTEST_LINK_FLAGS=-fno-lto -DBUILD_SHARED_LIBS:BOOL=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr -DENABLE_LINKER_BUILD_ID:BOOL=ON -DOFFLOAD_INSTALL_LIBDIR=lib64 -DPython3_EXECUTABLE=/usr/bin/python3 -DCMAKE_SKIP_INSTALL_RPATH:BOOL=ON -DPPC_LINUX_DEFAULT_IEEELONGDOUBLE=ON -DLLVM_LIBDIR_SUFFIX=64 -DLLVM_BINUTILS_INCDIR=/usr/include -DLLVM_VERSION_SUFFIX= ``` --- mlir/unittests/Target/LLVM/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt index 6d612548a94c0..0c61d222dedf4 100644 --- a/mlir/unittests/Target/LLVM/CMakeLists.txt +++ b/mlir/unittests/Target/LLVM/CMakeLists.txt @@ -1,11 +1,11 @@ +set(LLVM_LINK_COMPONENTS nativecodegen) + add_mlir_unittest(MLIRTargetLLVMTests SerializeNVVMTarget.cpp SerializeROCDLTarget.cpp SerializeToLLVMBitcode.cpp ) -llvm_map_components_to_libnames(llvm_libs nativecodegen) - target_link_libraries(MLIRTargetLLVMTests PRIVATE MLIRTargetLLVM @@ -19,7 +19,6 @@ target_link_libraries(MLIRTargetLLVMTests MLIRNVVMToLLVMIRTranslation MLIRROCDLToLLVMIRTranslation MLIRGPUToLLVMIRTranslation - ${llvm_libs} ) if (DEFINED LLVM_NATIVE_TARGET) From 3b0cb8979624bc052587712650bfd52f77eb69d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9sz=C3=A1ros=20Gergely?= Date: Wed, 4 Dec 2024 09:54:21 +0100 Subject: [PATCH 188/191] [clang-repl][CMake][MSVC] Use LINKER: instead of `-Wl` (#118518) This should be more portable, and avoids passing the option to `clang-cl` when linking, because `clang-cl` accepts any `-W` flags (normally warning flags) during linking (#118516). --- clang/tools/clang-repl/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/clang-repl/CMakeLists.txt b/clang/tools/clang-repl/CMakeLists.txt index 7aebbe7a19436..f9a911b0ae8e2 100644 --- a/clang/tools/clang-repl/CMakeLists.txt +++ b/clang/tools/clang-repl/CMakeLists.txt @@ -66,7 +66,7 @@ clang_target_link_libraries(clang-repl PRIVATE # start to exceed this limit, e.g. when linking for arm-linux-gnueabihf with # gold. This flag tells the linker to build a PLT for the full address range. # Linkers without this flag are assumed to support proper PLTs by default. -set(flag_long_plt "-Wl,--long-plt") +set(flag_long_plt "LINKER:--long-plt") check_linker_flag(CXX ${flag_long_plt} HAVE_LINKER_FLAG_LONG_PLT) if(HAVE_LINKER_FLAG_LONG_PLT) target_link_options(clang-repl PRIVATE ${flag_long_plt}) From d9b4bdbff597d0ed98dd82674e456ac4c751a6a0 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 4 Dec 2024 14:31:34 +0530 Subject: [PATCH 189/191] [CodeGen][NewPM] Port LiveDebugVariables to NPM (#115468) The existing analysis was already a pimpl wrapper. I have extracted legacy pass logic to a LDVImpl wrapper named `LiveDebugVariables` which is the analysis::Result now. This controls whether to activate the LDV (depending on `-live-debug-variables` and DIsubprogram) itself. The legacy and new analysis only construct the LiveDebugVariables. VirtRegRewriter will test this. --- .../include/llvm/CodeGen/LiveDebugVariables.h | 68 ++++++- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 2 + llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/LiveDebugVariables.cpp | 172 +++++++++++------- llvm/lib/CodeGen/RegAllocBasic.cpp | 6 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 8 +- llvm/lib/CodeGen/RegAllocGreedy.h | 3 +- llvm/lib/CodeGen/StackSlotColoring.cpp | 2 +- llvm/lib/CodeGen/VirtRegMap.cpp | 8 +- llvm/lib/Passes/PassBuilder.cpp | 1 + .../LoongArchDeadRegisterDefinitions.cpp | 2 +- .../RISCV/RISCVDeadRegisterDefinitions.cpp | 2 +- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 2 +- 14 files changed, 195 insertions(+), 85 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveDebugVariables.h b/llvm/include/llvm/CodeGen/LiveDebugVariables.h index a4b5a87fd3887..2cb95f2c71ccd 100644 --- a/llvm/include/llvm/CodeGen/LiveDebugVariables.h +++ b/llvm/include/llvm/CodeGen/LiveDebugVariables.h @@ -21,7 +21,10 @@ #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include namespace llvm { @@ -29,15 +32,15 @@ template class ArrayRef; class LiveIntervals; class VirtRegMap; -class LiveDebugVariables : public MachineFunctionPass { - void *pImpl = nullptr; +class LiveDebugVariables { public: - static char ID; // Pass identification, replacement for typeid - + class LDVImpl; LiveDebugVariables(); - ~LiveDebugVariables() override; + ~LiveDebugVariables(); + LiveDebugVariables(LiveDebugVariables &&); + void analyze(MachineFunction &MF, LiveIntervals *LIS); /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. @@ -49,12 +52,39 @@ class LiveDebugVariables : public MachineFunctionPass { /// @param VRM Rename virtual registers according to map. void emitDebugValues(VirtRegMap *VRM); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dump - Print data structures to dbgs(). void dump() const; +#endif + + void print(raw_ostream &OS) const; + + void releaseMemory(); + + bool invalidate(MachineFunction &MF, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &Inv); private: + std::unique_ptr PImpl; +}; + +class LiveDebugVariablesWrapperLegacy : public MachineFunctionPass { + std::unique_ptr Impl; + +public: + static char ID; // Pass identification, replacement for typeid + + LiveDebugVariablesWrapperLegacy(); + bool runOnMachineFunction(MachineFunction &) override; - void releaseMemory() override; + + LiveDebugVariables &getLDV() { return *Impl; } + const LiveDebugVariables &getLDV() const { return *Impl; } + + void releaseMemory() override { + if (Impl) + Impl->releaseMemory(); + } void getAnalysisUsage(AnalysisUsage &) const override; MachineFunctionProperties getSetProperties() const override { @@ -63,6 +93,32 @@ class LiveDebugVariables : public MachineFunctionPass { } }; +class LiveDebugVariablesAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = LiveDebugVariables; + + MachineFunctionProperties getSetProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::TracksDebugUserValues); + } + + Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); +}; + +class LiveDebugVariablesPrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + LiveDebugVariablesPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; } // end namespace llvm #endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 88bca2c75c949..7b81c9a8e143a 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -154,7 +154,7 @@ void initializeLegalizerPass(PassRegistry &); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLiveDebugValuesPass(PassRegistry &); -void initializeLiveDebugVariablesPass(PassRegistry &); +void initializeLiveDebugVariablesWrapperLegacyPass(PassRegistry &); void initializeLiveIntervalsWrapperPassPass(PassRegistry &); void initializeLiveRangeShrinkPass(PassRegistry &); void initializeLiveRegMatrixWrapperLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 614e36cfbd1a0..e65bd58dae96b 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -98,6 +98,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass()) // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis()) +MACHINE_FUNCTION_ANALYSIS("livedebugvars", LiveDebugVariablesAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) @@ -146,6 +147,7 @@ MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass()) MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass()) MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) MACHINE_FUNCTION_PASS("print", PrintMIRPass()) +MACHINE_FUNCTION_PASS("print", LiveDebugVariablesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", LiveIntervalsPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", LiveVariablesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 408395fefc298..59428818c1ee7 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -59,7 +59,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeInterleavedAccessPass(Registry); initializeJMCInstrumenterPass(Registry); initializeLiveDebugValuesPass(Registry); - initializeLiveDebugVariablesPass(Registry); + initializeLiveDebugVariablesWrapperLegacyPass(Registry); initializeLiveIntervalsWrapperPassPass(Registry); initializeLiveRangeShrinkPass(Registry); initializeLiveStacksPass(Registry); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 2ff346d3fd022..317d3401f000a 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -74,24 +75,27 @@ EnableLDV("live-debug-variables", cl::init(true), STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); STATISTIC(NumInsertedDebugLabels, "Number of DBG_LABELs inserted"); -char LiveDebugVariables::ID = 0; +char LiveDebugVariablesWrapperLegacy::ID = 0; -INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE, - "Debug Variable Analysis", false, false) +INITIALIZE_PASS_BEGIN(LiveDebugVariablesWrapperLegacy, DEBUG_TYPE, + "Debug Variable Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) -INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE, - "Debug Variable Analysis", false, false) +INITIALIZE_PASS_END(LiveDebugVariablesWrapperLegacy, DEBUG_TYPE, + "Debug Variable Analysis", false, false) -void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { +void LiveDebugVariablesWrapperLegacy::getAnalysisUsage( + AnalysisUsage &AU) const { AU.addRequired(); AU.addRequiredTransitive(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } -LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); +LiveDebugVariablesWrapperLegacy::LiveDebugVariablesWrapperLegacy() + : MachineFunctionPass(ID) { + initializeLiveDebugVariablesWrapperLegacyPass( + *PassRegistry::getPassRegistry()); } enum : unsigned { UndefLocNo = ~0U }; @@ -274,8 +278,6 @@ using BlockSkipInstsMap = namespace { -class LDVImpl; - /// A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register @@ -285,6 +287,8 @@ class LDVImpl; /// user values are related if they are held by the same virtual register. The /// equivalence class is the transitive closure of that relation. class UserValue { + using LDVImpl = LiveDebugVariables::LDVImpl; + const DILocalVariable *Variable; ///< The debug info variable we are part of. /// The part of the variable we describe. const std::optional Fragment; @@ -528,9 +532,17 @@ class UserLabel { void print(raw_ostream &, const TargetRegisterInfo *); }; +} // end anonymous namespace + +namespace llvm { + /// Implementation of the LiveDebugVariables pass. -class LDVImpl { - LiveDebugVariables &pass; + +LiveDebugVariables::LiveDebugVariables() = default; +LiveDebugVariables::~LiveDebugVariables() = default; +LiveDebugVariables::LiveDebugVariables(LiveDebugVariables &&) = default; + +class LiveDebugVariables::LDVImpl { LocMap::Allocator allocator; MachineFunction *MF = nullptr; LiveIntervals *LIS; @@ -634,7 +646,7 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} + LDVImpl(LiveIntervals *LIS) : LIS(LIS) {} bool runOnMachineFunction(MachineFunction &mf, bool InstrRef); @@ -671,9 +683,8 @@ class LDVImpl { void print(raw_ostream&); }; -} // end anonymous namespace +} // namespace llvm -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS, const LLVMContext &Ctx) { if (!DL) @@ -753,7 +764,7 @@ void UserLabel::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << '\n'; } -void LDVImpl::print(raw_ostream &OS) { +void LiveDebugVariables::LDVImpl::print(raw_ostream &OS) { OS << "********** DEBUG VARIABLES **********\n"; for (auto &userValue : userValues) userValue->print(OS, TRI); @@ -761,18 +772,16 @@ void LDVImpl::print(raw_ostream &OS) { for (auto &userLabel : userLabels) userLabel->print(OS, TRI); } -#endif -void UserValue::mapVirtRegs(LDVImpl *LDV) { +void UserValue::mapVirtRegs(LiveDebugVariables::LDVImpl *LDV) { for (const MachineOperand &MO : locations) if (MO.isReg() && MO.getReg().isVirtual()) LDV->mapVirtReg(MO.getReg(), this); } -UserValue * -LDVImpl::getUserValue(const DILocalVariable *Var, - std::optional Fragment, - const DebugLoc &DL) { +UserValue *LiveDebugVariables::LDVImpl::getUserValue( + const DILocalVariable *Var, + std::optional Fragment, const DebugLoc &DL) { // FIXME: Handle partially overlapping fragments. See // https://reviews.llvm.org/D70121#1849741. DebugVariable ID(Var, Fragment, DL->getInlinedAt()); @@ -785,19 +794,20 @@ LDVImpl::getUserValue(const DILocalVariable *Var, return UV; } -void LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) { +void LiveDebugVariables::LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) { assert(VirtReg.isVirtual() && "Only map VirtRegs"); UserValue *&Leader = virtRegToEqClass[VirtReg]; Leader = UserValue::merge(Leader, EC); } -UserValue *LDVImpl::lookupVirtReg(Register VirtReg) { +UserValue *LiveDebugVariables::LDVImpl::lookupVirtReg(Register VirtReg) { if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) return UV->getLeader(); return nullptr; } -bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { +bool LiveDebugVariables::LDVImpl::handleDebugValue(MachineInstr &MI, + SlotIndex Idx) { // DBG_VALUE loc, offset, variable, expr // DBG_VALUE_LIST variable, expr, locs... if (!MI.isDebugValue()) { @@ -873,8 +883,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return true; } -MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI, - SlotIndex Idx) { +MachineBasicBlock::iterator +LiveDebugVariables::LDVImpl::handleDebugInstr(MachineInstr &MI, SlotIndex Idx) { assert(MI.isDebugValueLike() || MI.isDebugPHI()); // In instruction referencing mode, there should be no DBG_VALUE instructions @@ -894,7 +904,8 @@ MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI, return NextInst; } -bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { +bool LiveDebugVariables::LDVImpl::handleDebugLabel(MachineInstr &MI, + SlotIndex Idx) { // DBG_LABEL label if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) { LLVM_DEBUG(dbgs() << "Can't handle " << MI); @@ -917,7 +928,8 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { return true; } -bool LDVImpl::collectDebugValues(MachineFunction &mf, bool InstrRef) { +bool LiveDebugVariables::LDVImpl::collectDebugValues(MachineFunction &mf, + bool InstrRef) { bool Changed = false; for (MachineBasicBlock &MBB : mf) { for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); @@ -1250,7 +1262,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, I.setStopUnchecked(PrevEnd); } -void LDVImpl::computeIntervals() { +void LiveDebugVariables::LDVImpl::computeIntervals() { LexicalScopes LS; LS.initialize(*MF); @@ -1260,10 +1272,10 @@ void LDVImpl::computeIntervals() { } } -bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) { +bool LiveDebugVariables::LDVImpl::runOnMachineFunction(MachineFunction &mf, + bool InstrRef) { clear(); MF = &mf; - LIS = &pass.getAnalysis().getLIS(); TRI = mf.getSubtarget().getRegisterInfo(); LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -1298,31 +1310,65 @@ static void removeDebugInstrs(MachineFunction &mf) { } } -bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { - if (!EnableLDV) - return false; - if (!mf.getFunction().getSubprogram()) { - removeDebugInstrs(mf); - return false; - } +bool LiveDebugVariablesWrapperLegacy::runOnMachineFunction( + MachineFunction &mf) { + auto *LIS = &getAnalysis().getLIS(); - // Have we been asked to track variable locations using instruction - // referencing? - bool InstrRef = mf.useDebugInstrRef(); + Impl = std::make_unique(); + Impl->analyze(mf, LIS); + return false; +} - if (!pImpl) - pImpl = new LDVImpl(this); - return static_cast(pImpl)->runOnMachineFunction(mf, InstrRef); +AnalysisKey LiveDebugVariablesAnalysis::Key; + +LiveDebugVariables +LiveDebugVariablesAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + auto *LIS = &MFAM.getResult(MF); + LiveDebugVariables LDV; + LDV.analyze(MF, LIS); + return LDV; +} + +PreservedAnalyses +LiveDebugVariablesPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto &LDV = MFAM.getResult(MF); + LDV.print(OS); + return PreservedAnalyses::all(); } void LiveDebugVariables::releaseMemory() { - if (pImpl) - static_cast(pImpl)->clear(); + if (PImpl) + PImpl->clear(); } -LiveDebugVariables::~LiveDebugVariables() { - if (pImpl) - delete static_cast(pImpl); +bool LiveDebugVariables::invalidate( + MachineFunction &, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &) { + auto PAC = PA.getChecker(); + // Some architectures split the register allocation into multiple phases based + // on register classes. This requires preserving analyses between the phases + // by default. + return !PAC.preservedWhenStateless(); +} + +void LiveDebugVariables::analyze(MachineFunction &MF, LiveIntervals *LIS) { + if (!EnableLDV) + return; + if (!MF.getFunction().getSubprogram()) { + removeDebugInstrs(MF); + return; + } + + PImpl.reset(new LDVImpl(LIS)); + + // Have we been asked to track variable locations using instruction + // referencing? + bool InstrRef = MF.useDebugInstrRef(); + PImpl->runOnMachineFunction(MF, InstrRef); } //===----------------------------------------------------------------------===// @@ -1445,7 +1491,8 @@ UserValue::splitRegister(Register OldReg, ArrayRef NewRegs, return DidChange; } -void LDVImpl::splitPHIRegister(Register OldReg, ArrayRef NewRegs) { +void LiveDebugVariables::LDVImpl::splitPHIRegister(Register OldReg, + ArrayRef NewRegs) { auto RegIt = RegToPHIIdx.find(OldReg); if (RegIt == RegToPHIIdx.end()) return; @@ -1483,7 +1530,8 @@ void LDVImpl::splitPHIRegister(Register OldReg, ArrayRef NewRegs) { RegToPHIIdx[RegAndInstr.first].push_back(RegAndInstr.second); } -void LDVImpl::splitRegister(Register OldReg, ArrayRef NewRegs) { +void LiveDebugVariables::LDVImpl::splitRegister(Register OldReg, + ArrayRef NewRegs) { // Consider whether this split range affects any PHI locations. splitPHIRegister(OldReg, NewRegs); @@ -1504,8 +1552,8 @@ void LDVImpl::splitRegister(Register OldReg, ArrayRef NewRegs) { void LiveDebugVariables:: splitRegister(Register OldReg, ArrayRef NewRegs, LiveIntervals &LIS) { - if (pImpl) - static_cast(pImpl)->splitRegister(OldReg, NewRegs); + if (PImpl) + PImpl->splitRegister(OldReg, NewRegs); } void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, @@ -1807,7 +1855,7 @@ void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII, LLVM_DEBUG(dbgs() << '\n'); } -void LDVImpl::emitDebugValues(VirtRegMap *VRM) { +void LiveDebugVariables::LDVImpl::emitDebugValues(VirtRegMap *VRM) { LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); if (!MF) return; @@ -1956,13 +2004,15 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { } void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { - if (pImpl) - static_cast(pImpl)->emitDebugValues(VRM); + if (PImpl) + PImpl->emitDebugValues(VRM); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { - if (pImpl) - static_cast(pImpl)->print(dbgs()); -} +LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { print(dbgs()); } #endif + +void LiveDebugVariables::print(raw_ostream &OS) const { + if (PImpl) + PImpl->print(OS); +} diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 55d806e768b91..7ee24c960dbe0 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -130,7 +130,7 @@ char &llvm::RABasicID = RABasic::ID; INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) @@ -180,8 +180,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index d0d2c585f0b54..8564fd8ca96da 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -151,7 +151,7 @@ char &llvm::RAGreedyID = RAGreedy::ID; INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", "Greedy Register Allocator", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) @@ -204,8 +204,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -2732,7 +2732,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { Loops = &getAnalysis().getLI(); Bundles = &getAnalysis().getEdgeBundles(); SpillPlacer = &getAnalysis().getResult(); - DebugVars = &getAnalysis(); + DebugVars = &getAnalysis().getLDV(); initializeCSRCost(); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 9578b8d3bef87..594c481826cf0 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineFunction.h" @@ -42,7 +43,7 @@ namespace llvm { class AllocationOrder; class AnalysisUsage; class EdgeBundles; -class LiveDebugVariables; +class LiveDebugVariablesWrapperLegacy; class LiveIntervals; class LiveRegMatrix; class MachineBasicBlock; diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index a1fa266354a52..cdc530621de62 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -159,7 +159,7 @@ namespace { // may be invoked multiple times requiring it to save these analyses to be // used by RA later. AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 26a12512c87be..2084e68c16e29 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -251,7 +251,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) @@ -264,14 +264,14 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); if (!ClearVirtRegs) - AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -285,7 +285,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { LIS = &getAnalysis().getLIS(); LRM = &getAnalysis().getLRM(); VRM = &getAnalysis().getVRM(); - DebugVars = &getAnalysis(); + DebugVars = &getAnalysis().getLDV(); LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getName() << '\n'); LLVM_DEBUG(VRM->dump()); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index ba52a37df9c25..cc9f59727c604 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -98,6 +98,7 @@ #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveVariables.h" diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp index f0e6837d49a97..d682b7dbe3ce2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp @@ -37,7 +37,7 @@ class LoongArchDeadRegisterDefinitions : public MachineFunctionPass { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index 4b35f3bb0a524..7bcf3397df97e 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -37,7 +37,7 @@ class RISCVDeadRegisterDefinitions : public MachineFunctionPass { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 421150a370199..870e393b40411 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -889,7 +889,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { AU.addUsedIfAvailable(); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); From 2137ded301adb430e0616cd835da9838e4fd79ce Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 4 Dec 2024 09:03:40 +0000 Subject: [PATCH 190/191] [OpenMP] Add Maintainers.md file --- llvm/Maintainers.md | 10 ++-------- openmp/Maintainers.md | 13 +++++++++++++ 2 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 openmp/Maintainers.md diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index e69fe62ac175b..63bdfd42528db 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -438,6 +438,8 @@ Others only have a lead maintainer listed here. [LLDB maintainers](https://github.com/llvm/llvm-project/blob/main/lldb/Maintainers.rst) +[LLVM OpenMP Library maintainers](https://github.com/llvm/llvm-project/blob/main/openmp/Maintainers.md) + #### libc++ Louis Dionne \ @@ -448,14 +450,6 @@ ldionne.2@gmail.com (email), [ldionne](https://github.com/ldionne) (GitHub) Tom Stellard \ tstellar@redhat.com (email), [tstellar](https://github.com/tstellar) (GitHub) -#### OpenMP (runtime library) - -Michael Klemm \ -michael.klemm@amd.com (email), [mjklemm](https://github.com/mjklemm) (GitHub) - -Terry Wilmarth \ -terry.l.wilmarth@intel.com (email), [TerryLWilmarth](https://github.com/TerryLWilmarth) (GitHub) - #### Polly Tobias Grosser \ diff --git a/openmp/Maintainers.md b/openmp/Maintainers.md new file mode 100644 index 0000000000000..1892577374e6f --- /dev/null +++ b/openmp/Maintainers.md @@ -0,0 +1,13 @@ +# LLVM OpenMP Library Maintainers + +This file is a list of the +[maintainers](https://llvm.org/docs/DeveloperPolicy.html#maintainers) for +the LLVM OpenMP library. + +# Current Maintainers + +Michael Klemm \ +michael.klemm@amd.com (email), [mjklemm](https://github.com/mjklemm) (GitHub) + +Terry Wilmarth \ +terry.l.wilmarth@intel.com (email), [TerryLWilmarth](https://github.com/TerryLWilmarth) (GitHub) From bba2507c19ff678c5d7b18e0b220406be87451fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9sz=C3=A1ros=20Gergely?= Date: Wed, 4 Dec 2024 10:09:29 +0100 Subject: [PATCH 191/191] [compiler-rt][MSVC][CMake] Wrap Linker flags for ICX (#118496) RFC: https://discourse.llvm.org/t/rfc-cmake-linker-flags-need-wl-equivalent-for-intel-c-icx-on-windows/82446 My previous pass missed some flags because I used `-Werror=unknown-argument`, but `/D`, `/I` and `/O` are accepted by clang (even when only linking), but mean different things than intended for `link.exe`. --- compiler-rt/CMakeLists.txt | 2 +- compiler-rt/lib/asan/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 20054c6e85a40..80d5aaabfd8c3 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -768,7 +768,7 @@ if (CMAKE_LINKER MATCHES "link.exe$") # it, but CMake doesn't seem to have a way to set linker flags for # individual static libraries, so we enable the suppression flag for # the whole compiler-rt project. - set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /IGNORE:4221") + set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/IGNORE:4221") endif() add_subdirectory(include) diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt index fb3d74283a61e..5ec995ae159b7 100644 --- a/compiler-rt/lib/asan/CMakeLists.txt +++ b/compiler-rt/lib/asan/CMakeLists.txt @@ -141,7 +141,7 @@ append_list_if(COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC # LLVM turns /OPT:ICF back on when LLVM_ENABLE_PDBs is set # we _REALLY_ need to turn it back off for ASAN, because the way # asan emulates weak functions from DLLs requires NOICF -append_list_if(MSVC "/DEBUG;/OPT:NOICF" ASAN_DYNAMIC_LINK_FLAGS) +append_list_if(MSVC "LINKER:/DEBUG;LINKER:/OPT:NOICF" ASAN_DYNAMIC_LINK_FLAGS) set(ASAN_DYNAMIC_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS}